From 747bda7bb5b1644a06734900326847a5d353c448 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 19 Apr 2024 10:29:00 +0200
Subject: fbdev/deferred-io: Provide get_page hook in struct fb_deferred_io

Add a callback for drivers to provide framebuffer pages to fbdev's
deferred-I/O helpers. Implementations need to acquire a reference on
the page before returning it. Returning NULL generates a SIGBUS
signal.

This will be useful for DRM's fbdev emulation with GEM-shmem buffer
objects.

v2:
- fix typo in commit message (Javier)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-8-tzimmermann@suse.de
---
 include/linux/fb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 811e47f9d1c3..5358edbb9c0b 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -225,6 +225,7 @@ struct fb_deferred_io {
 	struct mutex lock; /* mutex that protects the pageref list */
 	struct list_head pagereflist; /* list of pagerefs for touched pages */
 	/* callback */
+	struct page *(*get_page)(struct fb_info *info, unsigned long offset);
 	void (*deferred_io)(struct fb_info *info, struct list_head *pagelist);
 };
 #endif
-- 
cgit v1.2.3


From 150f431a08317e0e0363a7f9147b6246d3b40ba6 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 19 Apr 2024 10:29:01 +0200
Subject: drm/fbdev: Add fbdev-shmem

Add an fbdev emulation for SHMEM-based memory managers. The code is
similar to fbdev-generic, but does not require an additional shadow
buffer for mmap(). Fbdev-shmem operates directly on the buffer object's
SHMEM pages. Fbdev's deferred-I/O mechanism updates the hardware state
on write operations.

The memory pages of GEM SHMEM cannot be detected by fbdefio. Therefore
fbdev-shmem implements the .get_page() hook in struct fb_deferred_io.
The fbdefio helpers call this hook to retrieve the page directly from
fbdev-shmem instead of trying to detect it internally.

v3:
- clarify on get_page mechanism in commit description (Javier)
v2:
- use drm_driver_legacy_fb_format() (Geert)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-9-tzimmermann@suse.de
---
 include/drm/drm_fbdev_shmem.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 include/drm/drm_fbdev_shmem.h

(limited to 'include')

diff --git a/include/drm/drm_fbdev_shmem.h b/include/drm/drm_fbdev_shmem.h
new file mode 100644
index 000000000000..fb43cadd1950
--- /dev/null
+++ b/include/drm/drm_fbdev_shmem.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef DRM_FBDEV_SHMEM_H
+#define DRM_FBDEV_SHMEM_H
+
+struct drm_device;
+
+#ifdef CONFIG_DRM_FBDEV_EMULATION
+void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp);
+#else
+static inline void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp)
+{ }
+#endif
+
+#endif
-- 
cgit v1.2.3


From aae4682e5d66c1e1dc181fa341652e037237f144 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 19 Apr 2024 10:29:35 +0200
Subject: drm/fbdev-generic: Convert to fbdev-ttm

Only TTM-based drivers use fbdev-generic. Rename it to fbdev-ttm and
change the symbol infix from _generic_ to _ttm_. Link the source file
into TTM helpers, so that it is only build if TTM-based drivers have
been selected. Select DRM_TTM_HELPER for loongson.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-43-tzimmermann@suse.de
---
 include/drm/drm_fbdev_generic.h | 15 ---------------
 include/drm/drm_fbdev_ttm.h     | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 15 deletions(-)
 delete mode 100644 include/drm/drm_fbdev_generic.h
 create mode 100644 include/drm/drm_fbdev_ttm.h

(limited to 'include')

diff --git a/include/drm/drm_fbdev_generic.h b/include/drm/drm_fbdev_generic.h
deleted file mode 100644
index 75799342098d..000000000000
--- a/include/drm/drm_fbdev_generic.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-
-#ifndef DRM_FBDEV_GENERIC_H
-#define DRM_FBDEV_GENERIC_H
-
-struct drm_device;
-
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-void drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp);
-#else
-static inline void drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
-{ }
-#endif
-
-#endif
diff --git a/include/drm/drm_fbdev_ttm.h b/include/drm/drm_fbdev_ttm.h
new file mode 100644
index 000000000000..9e6c3bdf3537
--- /dev/null
+++ b/include/drm/drm_fbdev_ttm.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef DRM_FBDEV_TTM_H
+#define DRM_FBDEV_TTM_H
+
+struct drm_device;
+
+#ifdef CONFIG_DRM_FBDEV_EMULATION
+void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp);
+#else
+static inline void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp)
+{ }
+#endif
+
+#endif
-- 
cgit v1.2.3


From 18bc074c226bfecd205bf031678f5e35ee55c3da Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 19 Apr 2024 10:29:36 +0200
Subject: drm/fbdev: Clean up fbdev documentation

Rewrite some docs that are not up-to-date any longer. Remove the TODO
item for fbdev-generic conversion, as the helper has been replaced. Make
documentation for DMA, SHMEM and TTM emulation available.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-44-tzimmermann@suse.de
---
 include/drm/drm_mode_config.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index 8de3c9a5f61b..ab0f167474b1 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -106,8 +106,8 @@ struct drm_mode_config_funcs {
 	 * Drivers implementing fbdev emulation use drm_kms_helper_hotplug_event()
 	 * to call this hook to inform the fbdev helper of output changes.
 	 *
-	 * This hook is deprecated, drivers should instead use
-	 * drm_fbdev_generic_setup() which takes care of any necessary
+	 * This hook is deprecated, drivers should instead implement fbdev
+	 * support with struct drm_client, which takes care of any necessary
 	 * hotplug event forwarding already without further involvement by
 	 * the driver.
 	 */
-- 
cgit v1.2.3


From ef283674a17e000bb6b2ff05dd2ac5cbf2e3ae0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 22 Apr 2024 11:58:57 +0300
Subject: drm/uapi: Move drm_color_ctm_3x4 out from drm_mode.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drm_color_ctm_3x4 is some undocumented amgdpu private
uapi and thus has no business being in drm_mode.h.
At least move it to some amdgpu specific header, albeit
with the wrong namespace as maybe something somewhere
is using this already?

Cc: Harry Wentland <harry.wentland@amd.com>
Cc: Joshua Ashton <joshua@froggi.es>
Cc: Alex Deucher <alexander.deucher@amd.com>
Fixes: 6872a189be50 ("drm/amd/display: Add 3x4 CTM support for plane CTM")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240422085857.17651-1-ville.syrjala@linux.intel.com
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 9 +++++++++
 include/uapi/drm/drm_mode.h   | 8 --------
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 96e32dafd4f0..d5ebafacdd70 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -1269,6 +1269,15 @@ struct drm_amdgpu_info_gpuvm_fault {
 #define AMDGPU_FAMILY_GC_10_3_7			151 /* GC 10.3.7 */
 #define AMDGPU_FAMILY_GC_11_5_0			150 /* GC 11.5.0 */
 
+/* FIXME wrong namespace! */
+struct drm_color_ctm_3x4 {
+	/*
+	 * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude
+	 * (not two's complement!) format.
+	 */
+	__u64 matrix[12];
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 1ca5c7e418fd..d390011b89b4 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -846,14 +846,6 @@ struct drm_color_ctm {
 	__u64 matrix[9];
 };
 
-struct drm_color_ctm_3x4 {
-	/*
-	 * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude
-	 * (not two's complement!) format.
-	 */
-	__u64 matrix[12];
-};
-
 struct drm_color_lut {
 	/*
 	 * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and
-- 
cgit v1.2.3


From 7fb8af6798e8d013017e4607505f58d9942fd671 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Mon, 29 Apr 2024 19:43:36 +0300
Subject: drm: deprecate driver date

The driver date serves no useful purpose, because it's hardly ever
updated. The information is misleading at best.

As described in Documentation/gpu/drm-internals.rst:

  The driver date, formatted as YYYYMMDD, is meant to identify the date
  of the latest modification to the driver. However, as most drivers
  fail to update it, its value is mostly useless. The DRM core prints it
  to the kernel log at initialization time and passes it to userspace
  through the DRM_IOCTL_VERSION ioctl.

Stop printing the driver date at init, and start returning the empty
string "" as driver date through the DRM_IOCTL_VERSION ioctl.

The driver date initialization in drivers and the struct drm_driver date
member can be removed in follow-up.

Reviewed-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Acked-by: Simon Ser <contact@emersion.fr>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240429164336.1406480-1-jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/drm_drv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 8878260d7529..cd37936c3926 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -411,7 +411,7 @@ struct drm_driver {
 	char *name;
 	/** @desc: driver description */
 	char *desc;
-	/** @date: driver date */
+	/** @date: driver date, unused, to be removed */
 	char *date;
 
 	/**
-- 
cgit v1.2.3


From c72ceafbd12cf95e088681ae5e535ef1a78bf0ed Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Fri, 29 Mar 2024 16:24:42 -0500
Subject: mm: Introduce AS_INACCESSIBLE for encrypted/confidential memory

filemap users like guest_memfd may use page cache pages to
allocate/manage memory that is only intended to be accessed by guests
via hardware protections like encryption. Writes to memory of this sort
in common paths like truncation may cause unexpected behavior such as
writing garbage instead of zeros when attempting to zero pages, or
worse, triggering hardware protections that are considered fatal as far
as the kernel is concerned.

Introduce a new address_space flag, AS_INACCESSIBLE, and use this
initially to prevent zero'ing of pages during truncation, with the
understanding that it is up to the owner of the mapping to handle this
specially if needed.

This is admittedly a rather blunt solution, but it seems like
there are no other places that should take into account the
flag to keep its promise.

Link: https://lore.kernel.org/lkml/ZR9LYhpxTaTk6PJX@google.com/
Cc: Matthew Wilcox <willy@infradead.org>
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-ID: <20240329212444.395559-5-michael.roth@amd.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/pagemap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2df35e65557d..f879c1d54da7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -207,6 +207,7 @@ enum mapping_flags {
 	AS_STABLE_WRITES,	/* must wait for writeback before modifying
 				   folio contents */
 	AS_UNMOVABLE,		/* The mapping cannot be moved, ever */
+	AS_INACCESSIBLE,	/* Do not attempt direct R/W access to the mapping */
 };
 
 /**
-- 
cgit v1.2.3


From 3bb2531e20bff99f9cd8719ee7aea8bd82687173 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2024 12:54:03 -0400
Subject: KVM: guest_memfd: Add hook for initializing memory

guest_memfd pages are generally expected to be in some arch-defined
initial state prior to using them for guest memory. For SEV-SNP this
initial state is 'private', or 'guest-owned', and requires additional
operations to move these pages into a 'private' state by updating the
corresponding entries the RMP table.

Allow for an arch-defined hook to handle updates of this sort, and go
ahead and implement one for x86 so KVM implementations like AMD SVM can
register a kvm_x86_ops callback to handle these updates for SEV-SNP
guests.

The preparation callback is always called when allocating/grabbing
folios via gmem, and it is up to the architecture to keep track of
whether or not the pages are already in the expected state (e.g. the RMP
table in the case of SEV-SNP).

In some cases, it is necessary to defer the preparation of the pages to
handle things like in-place encryption of initial guest memory payloads
before marking these pages as 'private'/'guest-owned'.  Add an argument
(always true for now) to kvm_gmem_get_folio() that allows for the
preparation callback to be bypassed.  To detect possible issues in
the way userspace initializes memory, it is only possible to add an
unprepared page if it is not already included in the filemap.

Link: https://lore.kernel.org/lkml/ZLqVdvsF11Ddo7Dq@google.com/
Co-developed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-Id: <20231230172351.574091-5-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index afbc99264ffa..1af069ab657c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2443,4 +2443,9 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
 }
 #endif /* CONFIG_KVM_PRIVATE_MEM */
 
+#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
+int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
+bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
+#endif
+
 #endif
-- 
cgit v1.2.3


From 1f6c06b177513e8a47c43e95d1985dbd9cff3ddd Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 14 Feb 2024 12:09:06 -0500
Subject: KVM: guest_memfd: Add interface for populating gmem pages with user
 data

During guest run-time, kvm_arch_gmem_prepare() is issued as needed to
prepare newly-allocated gmem pages prior to mapping them into the guest.
In the case of SEV-SNP, this mainly involves setting the pages to
private in the RMP table.

However, for the GPA ranges comprising the initial guest payload, which
are encrypted/measured prior to starting the guest, the gmem pages need
to be accessed prior to setting them to private in the RMP table so they
can be initialized with the userspace-provided data. Additionally, an
SNP firmware call is needed afterward to encrypt them in-place and
measure the contents into the guest's launch digest.

While it is possible to bypass the kvm_arch_gmem_prepare() hooks so that
this handling can be done in an open-coded/vendor-specific manner, this
may expose more gmem-internal state/dependencies to external callers
than necessary. Try to avoid this by implementing an interface that
tries to handle as much of the common functionality inside gmem as
possible, while also making it generic enough to potentially be
usable/extensible for TDX as well.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Co-developed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1af069ab657c..1ae65774d9fa 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2448,4 +2448,31 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord
 bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
 #endif
 
+/**
+ * kvm_gmem_populate() - Populate/prepare a GPA range with guest data
+ *
+ * @kvm: KVM instance
+ * @gfn: starting GFN to be populated
+ * @src: userspace-provided buffer containing data to copy into GFN range
+ *       (passed to @post_populate, and incremented on each iteration
+ *       if not NULL)
+ * @npages: number of pages to copy from userspace-buffer
+ * @post_populate: callback to issue for each gmem page that backs the GPA
+ *                 range
+ * @opaque: opaque data to pass to @post_populate callback
+ *
+ * This is primarily intended for cases where a gmem-backed GPA range needs
+ * to be initialized with userspace-provided data prior to being mapped into
+ * the guest as a private page. This should be called with the slots->lock
+ * held so that caller-enforced invariants regarding the expected memory
+ * attributes of the GPA range do not race with KVM_SET_MEMORY_ATTRIBUTES.
+ *
+ * Returns the number of pages that were populated.
+ */
+typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
+				    void __user *src, int order, void *opaque);
+
+long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
+		       kvm_gmem_populate_cb post_populate, void *opaque);
+
 #endif
-- 
cgit v1.2.3


From a90764f0e4ed5350cc9caeb384e0fb356c032587 Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Sat, 30 Dec 2023 11:23:21 -0600
Subject: KVM: guest_memfd: Add hook for invalidating memory

In some cases, like with SEV-SNP, guest memory needs to be updated in a
platform-specific manner before it can be safely freed back to the host.
Wire up arch-defined hooks to the .free_folio kvm_gmem_aops callback to
allow for special handling of this sort when freeing memory in response
to FALLOC_FL_PUNCH_HOLE operations and when releasing the inode, and go
ahead and define an arch-specific hook for x86 since it will be needed
for handling memory used for SEV-SNP guests.

Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-Id: <20231230172351.574091-6-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1ae65774d9fa..b43b96f876fe 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2475,4 +2475,8 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
 long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
 		       kvm_gmem_populate_cb post_populate, void *opaque);
 
+#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
+void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
+#endif
+
 #endif
-- 
cgit v1.2.3


From ad27ce155566f2b4400fa865859834592bd18777 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Wed, 1 May 2024 03:51:57 -0500
Subject: KVM: SEV: Add KVM_SEV_SNP_LAUNCH_FINISH command

Add a KVM_SEV_SNP_LAUNCH_FINISH command to finalize the cryptographic
launch digest which stores the measurement of the guest at launch time.
Also extend the existing SNP firmware data structures to support
disabling the use of Versioned Chip Endorsement Keys (VCEK) by guests as
part of this command.

While finalizing the launch flow, the code also issues the LAUNCH_UPDATE
SNP firmware commands to encrypt/measure the initial VMSA pages for each
configured vCPU, which requires setting the RMP entries for those pages
to private, so also add handling to clean up the RMP entries for these
pages whening freeing vCPUs during shutdown.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Co-developed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Harald Hoyer <harald@profian.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Message-ID: <20240501085210.2213060-8-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/psp-sev.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index 3705c2044fc0..903ddfea8585 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -658,6 +658,7 @@ struct sev_data_snp_launch_update {
  * @id_auth_paddr: system physical address of ID block authentication structure
  * @id_block_en: indicates whether ID block is present
  * @auth_key_en: indicates whether author key is present in authentication structure
+ * @vcek_disabled: indicates whether use of VCEK is allowed for attestation reports
  * @rsvd: reserved
  * @host_data: host-supplied data for guest, not interpreted by firmware
  */
@@ -667,7 +668,8 @@ struct sev_data_snp_launch_finish {
 	u64 id_auth_paddr;
 	u8 id_block_en:1;
 	u8 auth_key_en:1;
-	u64 rsvd:62;
+	u8 vcek_disabled:1;
+	u64 rsvd:61;
 	u8 host_data[32];
 } __packed;
 
-- 
cgit v1.2.3


From 3dbfbd101a5844f851da9ae6e90f59753c10ff42 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Mon, 13 May 2024 23:27:23 +0300
Subject: drm/edid: remove drm_do_get_edid()

All users of drm_do_get_edid() have been converted to
drm_edid_read_custom(). Remove the unused function to prevent new users
from creeping in.

Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20240513202723.261440-1-jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/drm_edid.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index b085525e53e2..6bdfa254a1c1 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -423,10 +423,6 @@ static inline void drm_edid_decode_panel_id(u32 panel_id, char vend[4], u16 *pro
 }
 
 bool drm_probe_ddc(struct i2c_adapter *adapter);
-struct edid *drm_do_get_edid(struct drm_connector *connector,
-	int (*get_edid_block)(void *data, u8 *buf, unsigned int block,
-			      size_t len),
-	void *data);
 struct edid *drm_get_edid(struct drm_connector *connector,
 			  struct i2c_adapter *adapter);
 struct edid *drm_get_edid_switcheroo(struct drm_connector *connector,
-- 
cgit v1.2.3


From cdfad4db7756563db7d458216d9e3c2651dddc7d Mon Sep 17 00:00:00 2001
From: Tomasz Rusinowicz <tomasz.rusinowicz@intel.com>
Date: Mon, 13 May 2024 14:04:27 +0200
Subject: accel/ivpu: Add NPU profiling support

Implement time based Metric Streamer profiling UAPI.

This is a generic mechanism allowing user mode tools to sample
NPU metrics. These metrics are defined by the FW and transparent to
the driver.

The user space can check for this feature by checking
DRM_IVPU_CAP_METRIC_STREAMER driver capability.

Signed-off-by: Tomasz Rusinowicz <tomasz.rusinowicz@intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240513120431.3187212-9-jacek.lawrynowicz@linux.intel.com
---
 include/uapi/drm/ivpu_accel.h | 69 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h
index 19a13468eca5..084fb529e1e9 100644
--- a/include/uapi/drm/ivpu_accel.h
+++ b/include/uapi/drm/ivpu_accel.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
  */
 
 #ifndef __UAPI_IVPU_DRM_H__
@@ -21,6 +21,10 @@ extern "C" {
 #define DRM_IVPU_BO_INFO		  0x03
 #define DRM_IVPU_SUBMIT			  0x05
 #define DRM_IVPU_BO_WAIT		  0x06
+#define DRM_IVPU_METRIC_STREAMER_START	  0x07
+#define DRM_IVPU_METRIC_STREAMER_STOP	  0x08
+#define DRM_IVPU_METRIC_STREAMER_GET_DATA 0x09
+#define DRM_IVPU_METRIC_STREAMER_GET_INFO 0x0a
 
 #define DRM_IOCTL_IVPU_GET_PARAM                                               \
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param)
@@ -40,6 +44,22 @@ extern "C" {
 #define DRM_IOCTL_IVPU_BO_WAIT                                                 \
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_WAIT, struct drm_ivpu_bo_wait)
 
+#define DRM_IOCTL_IVPU_METRIC_STREAMER_START                                   \
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_START,            \
+		 struct drm_ivpu_metric_streamer_start)
+
+#define DRM_IOCTL_IVPU_METRIC_STREAMER_STOP                                    \
+	DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_STOP,              \
+		struct drm_ivpu_metric_streamer_stop)
+
+#define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA                                \
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_DATA,         \
+		 struct drm_ivpu_metric_streamer_get_data)
+
+#define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_INFO                                \
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_INFO,         \
+		 struct drm_ivpu_metric_streamer_get_data)
+
 /**
  * DOC: contexts
  *
@@ -336,6 +356,53 @@ struct drm_ivpu_bo_wait {
 	__u32 pad;
 };
 
+/**
+ * struct drm_ivpu_metric_streamer_start - Start collecting metric data
+ */
+struct drm_ivpu_metric_streamer_start {
+	/** @metric_group_mask: Indicates metric streamer instance */
+	__u64 metric_group_mask;
+	/** @sampling_period_ns: Sampling period in nanoseconds */
+	__u64 sampling_period_ns;
+	/**
+	 * @read_period_samples:
+	 *
+	 * Number of samples after which user space will try to read the data.
+	 * Reading the data after significantly longer period may cause data loss.
+	 */
+	__u32 read_period_samples;
+	/** @sample_size: Returned size of a single sample in bytes */
+	__u32 sample_size;
+	/** @max_data_size: Returned max @data_size from %DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA */
+	__u32 max_data_size;
+};
+
+/**
+ * struct drm_ivpu_metric_streamer_get_data - Copy collected metric data
+ */
+struct drm_ivpu_metric_streamer_get_data {
+	/** @metric_group_mask: Indicates metric streamer instance */
+	__u64 metric_group_mask;
+	/** @buffer_ptr: A pointer to a destination for the copied data */
+	__u64 buffer_ptr;
+	/** @buffer_size: Size of the destination buffer */
+	__u64 buffer_size;
+	/**
+	 * @data_size: Returned size of copied metric data
+	 *
+	 * If the @buffer_size is zero, returns the amount of data ready to be copied.
+	 */
+	__u64 data_size;
+};
+
+/**
+ * struct drm_ivpu_metric_streamer_stop - Stop collecting metric data
+ */
+struct drm_ivpu_metric_streamer_stop {
+	/** @metric_group_mask: Indicates metric streamer instance */
+	__u64 metric_group_mask;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 0b03829fdece47beba9ecb7dbcbde4585ee3663e Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Tue, 14 May 2024 10:20:51 -0700
Subject: drm/mipi-dsi: Fix theoretical int overflow in
 mipi_dsi_dcs_write_seq()

The mipi_dsi_dcs_write_seq() macro makes a call to
mipi_dsi_dcs_write_buffer() which returns a type ssize_t. The macro
then stores it in an int and checks to see if it's negative. This
could theoretically be a problem if "ssize_t" is larger than "int".

To see the issue, imagine that "ssize_t" is 32-bits and "int" is
16-bits, you could see a problem if there was some code out there that
looked like:

  mipi_dsi_dcs_write_seq(dsi, cmd, <32767 bytes as arguments>);

...since we'd get back that 32768 bytes were transferred and 32768
stored in a 16-bit int would look negative.

Though there are no callsites where we'd actually hit this (even if
"int" was only 16-bit), it's cleaner to make the types match so let's
fix it.

Fixes: 2a9e9daf7523 ("drm/mipi-dsi: Introduce mipi_dsi_dcs_write_seq macro")
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240514102056.v5.1.I30fa4c8348ea316c886ef8a522a52fed617f930d@changeid
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.1.I30fa4c8348ea316c886ef8a522a52fed617f930d@changeid
---
 include/drm/drm_mipi_dsi.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index 82b1cc434ea3..70ce0b8cbc68 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -333,18 +333,18 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi,
  * @cmd: Command
  * @seq: buffer containing data to be transmitted
  */
-#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...)                           \
-	do {                                                               \
-		static const u8 d[] = { cmd, seq };                        \
-		struct device *dev = &dsi->dev;                            \
-		int ret;                                                   \
-		ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d));    \
-		if (ret < 0) {                                             \
-			dev_err_ratelimited(                               \
-				dev, "sending command %#02x failed: %d\n", \
-				cmd, ret);                                 \
-			return ret;                                        \
-		}                                                          \
+#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...)                            \
+	do {                                                                \
+		static const u8 d[] = { cmd, seq };                         \
+		struct device *dev = &dsi->dev;                             \
+		ssize_t ret;                                                \
+		ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d));     \
+		if (ret < 0) {                                              \
+			dev_err_ratelimited(                                \
+				dev, "sending command %#02x failed: %zd\n", \
+				cmd, ret);                                  \
+			return ret;                                         \
+		}                                                           \
 	} while (0)
 
 /**
-- 
cgit v1.2.3


From 24acbcce5cc673886c2f4f9b3f6f89a9c6a53b7e Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Tue, 14 May 2024 10:20:52 -0700
Subject: drm/mipi-dsi: Fix theoretical int overflow in
 mipi_dsi_generic_write_seq()

The mipi_dsi_generic_write_seq() macro makes a call to
mipi_dsi_generic_write() which returns a type ssize_t. The macro then
stores it in an int and checks to see if it's negative. This could
theoretically be a problem if "ssize_t" is larger than "int".

To see the issue, imagine that "ssize_t" is 32-bits and "int" is
16-bits, you could see a problem if there was some code out there that
looked like:

  mipi_dsi_generic_write_seq(dsi, <32768 bytes as arguments>);

...since we'd get back that 32768 bytes were transferred and 32768
stored in a 16-bit int would look negative.

Though there are no callsites where we'd actually hit this (even if
"int" was only 16-bit), it's cleaner to make the types match so let's
fix it.

Fixes: a9015ce59320 ("drm/mipi-dsi: Add a mipi_dsi_dcs_write_seq() macro")
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240514102056.v5.2.Iadb65b8add19ed3ae3ed6425011beb97e380a912@changeid
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.2.Iadb65b8add19ed3ae3ed6425011beb97e380a912@changeid
---
 include/drm/drm_mipi_dsi.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index 70ce0b8cbc68..e0f56564bf97 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -314,17 +314,17 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi,
  * @dsi: DSI peripheral device
  * @seq: buffer containing the payload
  */
-#define mipi_dsi_generic_write_seq(dsi, seq...)                                \
-	do {                                                                   \
-		static const u8 d[] = { seq };                                 \
-		struct device *dev = &dsi->dev;                                \
-		int ret;                                                       \
-		ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d));           \
-		if (ret < 0) {                                                 \
-			dev_err_ratelimited(dev, "transmit data failed: %d\n", \
-					    ret);                              \
-			return ret;                                            \
-		}                                                              \
+#define mipi_dsi_generic_write_seq(dsi, seq...)                                 \
+	do {                                                                    \
+		static const u8 d[] = { seq };                                  \
+		struct device *dev = &dsi->dev;                                 \
+		ssize_t ret;                                                    \
+		ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d));            \
+		if (ret < 0) {                                                  \
+			dev_err_ratelimited(dev, "transmit data failed: %zd\n", \
+					    ret);                               \
+			return ret;                                             \
+		}                                                               \
 	} while (0)
 
 /**
-- 
cgit v1.2.3


From 7d3f6acaf87c7db6dcd868694a2f65e7040478dc Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Tue, 14 May 2024 10:20:53 -0700
Subject: drm/mipi-dsi: mipi_dsi_*_write functions don't need to ratelimit
 prints

We really don't expect these errors to be printed over and over
again. When a driver hits the error it should bail out. Just use a
normal error print.

This gives a nice space savings for users of these functions:

$ scripts/bloat-o-meter \
  .../before/panel-novatek-nt36672e.ko \
  .../after/panel-novatek-nt36672e.ko
add/remove: 0/1 grow/shrink: 0/1 up/down: 0/-16760 (-16760)
Function                                     old     new   delta
nt36672e_1080x2408_60hz_init               17080   10640   -6440
nt36672e_1080x2408_60hz_init._rs           10320       -  -10320
Total: Before=31815, After=15055, chg -52.68%

Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240514102056.v5.3.I9982cd5d8014de7a4513f5619f66f88da49ce4ec@changeid
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.3.I9982cd5d8014de7a4513f5619f66f88da49ce4ec@changeid
---
 include/drm/drm_mipi_dsi.h | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index e0f56564bf97..67967be48dbd 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -314,17 +314,16 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi,
  * @dsi: DSI peripheral device
  * @seq: buffer containing the payload
  */
-#define mipi_dsi_generic_write_seq(dsi, seq...)                                 \
-	do {                                                                    \
-		static const u8 d[] = { seq };                                  \
-		struct device *dev = &dsi->dev;                                 \
-		ssize_t ret;                                                    \
-		ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d));            \
-		if (ret < 0) {                                                  \
-			dev_err_ratelimited(dev, "transmit data failed: %zd\n", \
-					    ret);                               \
-			return ret;                                             \
-		}                                                               \
+#define mipi_dsi_generic_write_seq(dsi, seq...)                           \
+	do {                                                              \
+		static const u8 d[] = { seq };                            \
+		struct device *dev = &dsi->dev;                           \
+		ssize_t ret;                                              \
+		ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d));      \
+		if (ret < 0) {                                            \
+			dev_err(dev, "transmit data failed: %zd\n", ret); \
+			return ret;                                       \
+		}                                                         \
 	} while (0)
 
 /**
@@ -340,8 +339,7 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi,
 		ssize_t ret;                                                \
 		ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d));     \
 		if (ret < 0) {                                              \
-			dev_err_ratelimited(                                \
-				dev, "sending command %#02x failed: %zd\n", \
+			dev_err(dev, "sending command %#02x failed: %zd\n", \
 				cmd, ret);                                  \
 			return ret;                                         \
 		}                                                           \
-- 
cgit v1.2.3


From 3b724909a380fddb44dfa0072fc459c698a52658 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Tue, 14 May 2024 10:20:54 -0700
Subject: drm/mipi-dsi: Reduce driver bloat of mipi_dsi_*_write_seq()

Through a cooperative effort between Hsin-Yi Wang and Dmitry
Baryshkov, we have realized the dev_err() in the
mipi_dsi_*_write_seq() macros was causing quite a bit of bloat to the
kernel. Let's hoist this call into drm_mipi_dsi.c by adding a "chatty"
version of the functions that includes the print. While doing this,
add a bit more comments to these macros making it clear that they
print errors and also that they return out of _the caller's_ function.

Without any changes to clients this gives a nice savings. Specifically
the macro was inlined and thus the error report call was inlined into
every call to mipi_dsi_dcs_write_seq() and
mipi_dsi_generic_write_seq(). By using a call to a "chatty" function,
the usage is reduced to one call in the chatty function and a function
call at the invoking site.

Building with my build system shows one example:

$ scripts/bloat-o-meter \
  .../before/panel-novatek-nt36672e.ko \
  .../after/panel-novatek-nt36672e.ko
add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-4404 (-4404)
Function                                     old     new   delta
nt36672e_1080x2408_60hz_init               10640    6236   -4404
Total: Before=15055, After=10651, chg -29.25%

Note that given the change in location of the print it's harder to
include the "cmd" in the printout for mipi_dsi_dcs_write_seq() since,
theoretically, someone could call the new chatty function with a
zero-size array and it would be illegal to dereference data[0].
There's a printk format to print the whole buffer and this is probably
more useful for debugging anyway. Given that we're doing this for
mipi_dsi_dcs_write_seq(), let's also print the buffer for
mipi_dsi_generic_write_seq() in the error case.

It should be noted that the current consensus of DRM folks is that the
mipi_dsi_*_write_seq() should be deprecated due to the non-intuitive
return behavior. A future patch will formally mark them as deprecated
and provide an alternative.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240514102056.v5.4.Id15fae80582bc74a0d4f1338987fa375738f45b9@changeid
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.4.Id15fae80582bc74a0d4f1338987fa375738f45b9@changeid
---
 include/drm/drm_mipi_dsi.h | 47 +++++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index 67967be48dbd..6d68d9927f46 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -256,6 +256,8 @@ int mipi_dsi_picture_parameter_set(struct mipi_dsi_device *dsi,
 
 ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload,
 			       size_t size);
+int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi,
+				  const void *payload, size_t size);
 ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params,
 			      size_t num_params, void *data, size_t size);
 
@@ -279,6 +281,8 @@ enum mipi_dsi_dcs_tear_mode {
 
 ssize_t mipi_dsi_dcs_write_buffer(struct mipi_dsi_device *dsi,
 				  const void *data, size_t len);
+int mipi_dsi_dcs_write_buffer_chatty(struct mipi_dsi_device *dsi,
+				     const void *data, size_t len);
 ssize_t mipi_dsi_dcs_write(struct mipi_dsi_device *dsi, u8 cmd,
 			   const void *data, size_t len);
 ssize_t mipi_dsi_dcs_read(struct mipi_dsi_device *dsi, u8 cmd, void *data,
@@ -311,38 +315,39 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi,
 
 /**
  * mipi_dsi_generic_write_seq - transmit data using a generic write packet
+ *
+ * This macro will print errors for you and will RETURN FROM THE CALLING
+ * FUNCTION (yes this is non-intuitive) upon error.
+ *
  * @dsi: DSI peripheral device
  * @seq: buffer containing the payload
  */
-#define mipi_dsi_generic_write_seq(dsi, seq...)                           \
-	do {                                                              \
-		static const u8 d[] = { seq };                            \
-		struct device *dev = &dsi->dev;                           \
-		ssize_t ret;                                              \
-		ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d));      \
-		if (ret < 0) {                                            \
-			dev_err(dev, "transmit data failed: %zd\n", ret); \
-			return ret;                                       \
-		}                                                         \
+#define mipi_dsi_generic_write_seq(dsi, seq...)                                \
+	do {                                                                   \
+		static const u8 d[] = { seq };                                 \
+		int ret;                                                       \
+		ret = mipi_dsi_generic_write_chatty(dsi, d, ARRAY_SIZE(d));    \
+		if (ret < 0)                                                   \
+			return ret;                                            \
 	} while (0)
 
 /**
  * mipi_dsi_dcs_write_seq - transmit a DCS command with payload
+ *
+ * This macro will print errors for you and will RETURN FROM THE CALLING
+ * FUNCTION (yes this is non-intuitive) upon error.
+ *
  * @dsi: DSI peripheral device
  * @cmd: Command
  * @seq: buffer containing data to be transmitted
  */
-#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...)                            \
-	do {                                                                \
-		static const u8 d[] = { cmd, seq };                         \
-		struct device *dev = &dsi->dev;                             \
-		ssize_t ret;                                                \
-		ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d));     \
-		if (ret < 0) {                                              \
-			dev_err(dev, "sending command %#02x failed: %zd\n", \
-				cmd, ret);                                  \
-			return ret;                                         \
-		}                                                           \
+#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...)                               \
+	do {                                                                   \
+		static const u8 d[] = { cmd, seq };                            \
+		int ret;                                                       \
+		ret = mipi_dsi_dcs_write_buffer_chatty(dsi, d, ARRAY_SIZE(d)); \
+		if (ret < 0)                                                   \
+			return ret;                                            \
 	} while (0)
 
 /**
-- 
cgit v1.2.3


From 966e397e4f6032b73438f8d775756541513e7daf Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Tue, 14 May 2024 10:20:55 -0700
Subject: drm/mipi-dsi: Introduce mipi_dsi_*_write_seq_multi()

The current mipi_dsi_*_write_seq() macros are non-intutitive because
they contain a hidden "return" statement that will return out of the
_caller_ of the macro. Let's mark them as deprecated and instead
introduce some new macros that are more intuitive.

These new macros are less optimal when an error occurs but should
behave more optimally when there is no error. Specifically these new
macros cause smaller code to get generated and the code size savings
(less to fetch from RAM, less cache space used, less RAM used) are
important. Since the error case isn't something we need to optimize
for and these new macros are easier to understand and more flexible,
they should be used.

After converting to use these new functions, one example shows some
nice savings while also being easier to understand.

$ scripts/bloat-o-meter \
  ...after/panel-novatek-nt36672e.ko \
  ...ctx/panel-novatek-nt36672e.ko
add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-988 (-988)
Function                                     old     new   delta
nt36672e_1080x2408_60hz_init                6236    5248    -988
Total: Before=10651, After=9663, chg -9.28%

Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240514102056.v5.5.Ie94246c30fe95101e0e26dd5f96e976dbeb8f242@changeid
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.5.Ie94246c30fe95101e0e26dd5f96e976dbeb8f242@changeid
---
 include/drm/drm_mipi_dsi.h | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index 6d68d9927f46..5e9cad541bd6 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -197,6 +197,27 @@ struct mipi_dsi_device {
 	struct drm_dsc_config *dsc;
 };
 
+/**
+ * struct mipi_dsi_multi_context - Context to call multiple MIPI DSI funcs in a row
+ */
+struct mipi_dsi_multi_context {
+	/**
+	 * @dsi: Pointer to the MIPI DSI device
+	 */
+	struct mipi_dsi_device *dsi;
+
+	/**
+	 * @accum_err: Storage for the accumulated error over the multiple calls
+	 *
+	 * Init to 0. If a function encounters an error then the error code
+	 * will be stored here. If you call a function and this points to a
+	 * non-zero value then the function will be a noop. This allows calling
+	 * a function many times in a row and just checking the error at the
+	 * end to see if any of them failed.
+	 */
+	int accum_err;
+};
+
 #define MIPI_DSI_MODULE_PREFIX "mipi-dsi:"
 
 #define to_mipi_dsi_device(__dev)	container_of_const(__dev, struct mipi_dsi_device, dev)
@@ -258,6 +279,8 @@ ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload,
 			       size_t size);
 int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi,
 				  const void *payload, size_t size);
+void mipi_dsi_generic_write_multi(struct mipi_dsi_multi_context *ctx,
+				  const void *payload, size_t size);
 ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params,
 			      size_t num_params, void *data, size_t size);
 
@@ -283,6 +306,8 @@ ssize_t mipi_dsi_dcs_write_buffer(struct mipi_dsi_device *dsi,
 				  const void *data, size_t len);
 int mipi_dsi_dcs_write_buffer_chatty(struct mipi_dsi_device *dsi,
 				     const void *data, size_t len);
+void mipi_dsi_dcs_write_buffer_multi(struct mipi_dsi_multi_context *ctx,
+				     const void *data, size_t len);
 ssize_t mipi_dsi_dcs_write(struct mipi_dsi_device *dsi, u8 cmd,
 			   const void *data, size_t len);
 ssize_t mipi_dsi_dcs_read(struct mipi_dsi_device *dsi, u8 cmd, void *data,
@@ -319,6 +344,9 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi,
  * This macro will print errors for you and will RETURN FROM THE CALLING
  * FUNCTION (yes this is non-intuitive) upon error.
  *
+ * Because of the non-intuitive return behavior, THIS MACRO IS DEPRECATED.
+ * Please replace calls of it with mipi_dsi_generic_write_seq_multi().
+ *
  * @dsi: DSI peripheral device
  * @seq: buffer containing the payload
  */
@@ -331,12 +359,30 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi,
 			return ret;                                            \
 	} while (0)
 
+/**
+ * mipi_dsi_generic_write_seq_multi - transmit data using a generic write packet
+ *
+ * This macro will print errors for you and error handling is optimized for
+ * callers that call this multiple times in a row.
+ *
+ * @ctx: Context for multiple DSI transactions
+ * @seq: buffer containing the payload
+ */
+#define mipi_dsi_generic_write_seq_multi(ctx, seq...)                \
+	do {                                                         \
+		static const u8 d[] = { seq };                       \
+		mipi_dsi_generic_write_multi(ctx, d, ARRAY_SIZE(d)); \
+	} while (0)
+
 /**
  * mipi_dsi_dcs_write_seq - transmit a DCS command with payload
  *
  * This macro will print errors for you and will RETURN FROM THE CALLING
  * FUNCTION (yes this is non-intuitive) upon error.
  *
+ * Because of the non-intuitive return behavior, THIS MACRO IS DEPRECATED.
+ * Please replace calls of it with mipi_dsi_dcs_write_seq_multi().
+ *
  * @dsi: DSI peripheral device
  * @cmd: Command
  * @seq: buffer containing data to be transmitted
@@ -350,6 +396,22 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi,
 			return ret;                                            \
 	} while (0)
 
+/**
+ * mipi_dsi_dcs_write_seq_multi - transmit a DCS command with payload
+ *
+ * This macro will print errors for you and error handling is optimized for
+ * callers that call this multiple times in a row.
+ *
+ * @ctx: Context for multiple DSI transactions
+ * @cmd: Command
+ * @seq: buffer containing data to be transmitted
+ */
+#define mipi_dsi_dcs_write_seq_multi(ctx, cmd, seq...)                  \
+	do {                                                            \
+		static const u8 d[] = { cmd, seq };                     \
+		mipi_dsi_dcs_write_buffer_multi(ctx, d, ARRAY_SIZE(d)); \
+	} while (0)
+
 /**
  * struct mipi_dsi_driver - DSI driver
  * @driver: device driver model driver
-- 
cgit v1.2.3


From 0f1bb41bf39695c84c83ce6f69e125b562d1d7ab Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Tue, 14 May 2024 15:59:39 +0100
Subject: drm/i915: Support replaying GPU hangs with captured context image

When debugging GPU hangs Mesa developers are finding it useful to replay
the captured error state against the simulator. But due various simulator
limitations which prevent replicating all hangs, one step further is being
able to replay against a real GPU.

This is almost doable today with the missing part being able to upload the
captured context image into the driver state prior to executing the
uploaded hanging batch and all the buffers.

To enable this last part we add a new context parameter called
I915_CONTEXT_PARAM_CONTEXT_IMAGE. It follows the existing SSEU
configuration pattern of being able to select which context to apply
against, paired with the actual image and its size.

Since this is adding a new concept of debug only uapi, we hide it behind
a new kconfig option and also require activation with a module parameter.
Together with a warning banner printed at driver load, all those combined
should be sufficient to guard against inadvertently enabling the feature.

In terms of implementation we allow the legacy context set param to be
used since that removes the need to record the per context data in the
proto context, while still allowing flexibility of specifying context
images for any context.

Mesa MR using the uapi can be seen at:
  https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27594

v2:
 * Fix whitespace alignment as per checkpatch.
 * Added warning on userspace misuse.
 * Rebase for extracting ce->default_state shadowing.

v3:
 * Rebase for I915_CONTEXT_PARAM_LOW_LATENCY.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Carlos Santa <carlos.santa@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Tested-by: Carlos Santa <carlos.santa@intel.com>
Signed-off-by: Tvrtko Ursulin <tursulin@igalia.com>
Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20240514145939.87427-2-tursulin@igalia.com
---
 include/uapi/drm/i915_drm.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index d4d86e566e07..535cb68fdb5c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param {
  * supports this per context flag.
  */
 #define I915_CONTEXT_PARAM_LOW_LATENCY		0xe
+
+/*
+ * I915_CONTEXT_PARAM_CONTEXT_IMAGE:
+ *
+ * Allows userspace to provide own context images.
+ *
+ * Note that this is a debug API not available on production kernel builds.
+ */
+#define I915_CONTEXT_PARAM_CONTEXT_IMAGE	0xf
 /* Must be kept compact -- no holes and well documented */
 
 	/** @value: Context parameter value to be set or queried */
@@ -2564,6 +2573,24 @@ struct i915_context_param_engines {
 	struct i915_engine_class_instance engines[N__]; \
 } __attribute__((packed)) name__
 
+struct i915_gem_context_param_context_image {
+	/** @engine: Engine class & instance to be configured. */
+	struct i915_engine_class_instance engine;
+
+	/** @flags: One of the supported flags or zero. */
+	__u32 flags;
+#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0)
+
+	/** @size: Size of the image blob pointed to by @image. */
+	__u32 size;
+
+	/** @mbz: Must be zero. */
+	__u32 mbz;
+
+	/** @image: Userspace memory containing the context image. */
+	__u64 image;
+} __attribute__((packed));
+
 /**
  * struct drm_i915_gem_context_create_ext_setparam - Context parameter
  * to set or query during context creation.
-- 
cgit v1.2.3


From f79d6d28d8fe77b14beeaebe5393d9f294f8d09d Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sun, 12 May 2024 02:00:19 +0300
Subject: drm/mipi-dsi: wrap more functions for streamline handling

Follow the pattern of mipi_dsi_dcs_*_multi() and wrap several existing
MIPI DSI functions to use the context for processing. This simplifies
and streamlines driver code to use simpler code pattern.

Note, msleep function is also wrapped in this way as it is frequently
called inbetween other mipi_dsi_dcs_*() functions.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20240512-dsi-panels-upd-api-v2-2-e31ca14d102e@linaro.org
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240512-dsi-panels-upd-api-v2-2-e31ca14d102e@linaro.org
---
 include/drm/drm_mipi_dsi.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index 5e9cad541bd6..bd5a0b6d0711 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -275,6 +275,13 @@ int mipi_dsi_compression_mode_ext(struct mipi_dsi_device *dsi, bool enable,
 int mipi_dsi_picture_parameter_set(struct mipi_dsi_device *dsi,
 				   const struct drm_dsc_picture_parameter_set *pps);
 
+void mipi_dsi_compression_mode_ext_multi(struct mipi_dsi_multi_context *ctx,
+					 bool enable,
+					 enum mipi_dsi_compression_algo algo,
+					 unsigned int pps_selector);
+void mipi_dsi_picture_parameter_set_multi(struct mipi_dsi_multi_context *ctx,
+					  const struct drm_dsc_picture_parameter_set *pps);
+
 ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload,
 			       size_t size);
 int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi,
@@ -284,6 +291,12 @@ void mipi_dsi_generic_write_multi(struct mipi_dsi_multi_context *ctx,
 ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params,
 			      size_t num_params, void *data, size_t size);
 
+#define mipi_dsi_msleep(ctx, delay)	\
+	do {				\
+		if (!ctx.accum_err)	\
+			msleep(delay);	\
+	} while (0)
+
 /**
  * enum mipi_dsi_dcs_tear_mode - Tearing Effect Output Line mode
  * @MIPI_DSI_DCS_TEAR_MODE_VBLANK: the TE output line consists of V-Blanking
@@ -338,6 +351,14 @@ int mipi_dsi_dcs_set_display_brightness_large(struct mipi_dsi_device *dsi,
 int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi,
 					     u16 *brightness);
 
+void mipi_dsi_dcs_nop_multi(struct mipi_dsi_multi_context *ctx);
+void mipi_dsi_dcs_enter_sleep_mode_multi(struct mipi_dsi_multi_context *ctx);
+void mipi_dsi_dcs_exit_sleep_mode_multi(struct mipi_dsi_multi_context *ctx);
+void mipi_dsi_dcs_set_display_off_multi(struct mipi_dsi_multi_context *ctx);
+void mipi_dsi_dcs_set_display_on_multi(struct mipi_dsi_multi_context *ctx);
+void mipi_dsi_dcs_set_tear_on_multi(struct mipi_dsi_multi_context *ctx,
+				    enum mipi_dsi_dcs_tear_mode mode);
+
 /**
  * mipi_dsi_generic_write_seq - transmit data using a generic write packet
  *
-- 
cgit v1.2.3


From 36b75080e68b4a27ae1c40beffb3d6131f8eeeff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Sun, 12 May 2024 19:23:26 -0300
Subject: drm/v3d: Create a new V3D parameter for the maximum number of perfcnt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The maximum number of performance counters can change from version to
version and it's important for userspace to know this value, as it needs
to use the counters for performance queries. Therefore, expose the
maximum number of performance counters to userspace as a parameter.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240512222655.2792754-4-mcanal@igalia.com
---
 include/uapi/drm/v3d_drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index dce1835eced4..215b01bb69c3 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -286,6 +286,7 @@ enum drm_v3d_param {
 	DRM_V3D_PARAM_SUPPORTS_PERFMON,
 	DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT,
 	DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE,
+	DRM_V3D_PARAM_MAX_PERF_COUNTERS,
 };
 
 struct drm_v3d_get_param {
-- 
cgit v1.2.3


From f33fe58298e686e7cc2d24f747c980457812b566 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Sun, 12 May 2024 19:23:27 -0300
Subject: drm/v3d: Create new IOCTL to expose performance counters information
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Userspace usually needs some information about the performance counters
available. Although we could replicate this information in the kernel
and user-space, let's use the kernel as the "single source of truth" to
avoid issues in the future (e.g. list of performance counters is updated
in user-space, but not in the kernel, generating invalid requests).

Therefore, create a new IOCTL to expose the performance counters
information, that is name, category, and description.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240512222655.2792754-5-mcanal@igalia.com
---
 include/uapi/drm/v3d_drm.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 215b01bb69c3..0860ddb3d0b6 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -42,6 +42,7 @@ extern "C" {
 #define DRM_V3D_PERFMON_DESTROY                   0x09
 #define DRM_V3D_PERFMON_GET_VALUES                0x0a
 #define DRM_V3D_SUBMIT_CPU                        0x0b
+#define DRM_V3D_PERFMON_GET_COUNTER               0x0c
 
 #define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
 #define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
@@ -58,6 +59,8 @@ extern "C" {
 #define DRM_IOCTL_V3D_PERFMON_GET_VALUES  DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \
 						   struct drm_v3d_perfmon_get_values)
 #define DRM_IOCTL_V3D_SUBMIT_CPU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu)
+#define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \
+						   struct drm_v3d_perfmon_get_counter)
 
 #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE             0x01
 #define DRM_V3D_SUBMIT_EXTENSION		  0x02
@@ -718,6 +721,40 @@ struct drm_v3d_perfmon_get_values {
 	__u64 values_ptr;
 };
 
+#define DRM_V3D_PERFCNT_MAX_NAME 64
+#define DRM_V3D_PERFCNT_MAX_CATEGORY 32
+#define DRM_V3D_PERFCNT_MAX_DESCRIPTION 256
+
+/**
+ * struct drm_v3d_perfmon_get_counter - ioctl to get the description of a
+ * performance counter
+ *
+ * As userspace needs to retrieve information about the performance counters
+ * available, this IOCTL allows users to get information about a performance
+ * counter (name, category and description).
+ */
+struct drm_v3d_perfmon_get_counter {
+	/*
+	 * Counter ID
+	 *
+	 * Must be smaller than the maximum number of performance counters, which
+	 * can be retrieve through DRM_V3D_PARAM_MAX_PERF_COUNTERS.
+	 */
+	__u8 counter;
+
+	/* Name of the counter */
+	__u8 name[DRM_V3D_PERFCNT_MAX_NAME];
+
+	/* Category of the counter */
+	__u8 category[DRM_V3D_PERFCNT_MAX_CATEGORY];
+
+	/* Description of the counter */
+	__u8 description[DRM_V3D_PERFCNT_MAX_DESCRIPTION];
+
+	/* mbz */
+	__u8 reserved[7];
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 673087d8b023faf34b84e8faf63bbeea3da87bab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ADra=20Canal?= <mcanal@igalia.com>
Date: Sun, 12 May 2024 19:23:29 -0300
Subject: drm/v3d: Deprecate the use of the Performance Counters enum
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Performance Counters enum used to identify the index of each
performance counter and provide the total number of performance
counters (V3D_PERFCNT_NUM). But, this enum is only valid for V3D 4.2,
not for V3D 7.1.

As we implemented a new flexible structure to retrieve performance
counters information, we can deprecate this enum.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240512222655.2792754-7-mcanal@igalia.com
---
 include/uapi/drm/v3d_drm.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 0860ddb3d0b6..87fc5bb0a61e 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -603,6 +603,16 @@ struct drm_v3d_submit_cpu {
 	__u64 extensions;
 };
 
+/* The performance counters index represented by this enum are deprecated and
+ * must no longer be used. These counters are only valid for V3D 4.2.
+ *
+ * In order to check for performance counter information,
+ * use DRM_IOCTL_V3D_PERFMON_GET_COUNTER.
+ *
+ * Don't use V3D_PERFCNT_NUM to retrieve the maximum number of performance
+ * counters. You should use DRM_IOCTL_V3D_GET_PARAM with the following
+ * parameter: DRM_V3D_PARAM_MAX_PERF_COUNTERS.
+ */
 enum {
 	V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS,
 	V3D_PERFCNT_FEP_VALID_PRIMS,
-- 
cgit v1.2.3


From 8a0a7b98d4b6eeeab337ec25daa4bc0a5e710a15 Mon Sep 17 00:00:00 2001
From: Wayne Lin <Wayne.Lin@amd.com>
Date: Thu, 7 Mar 2024 14:29:57 +0800
Subject: drm/mst: Fix NULL pointer dereference at drm_dp_add_payload_part2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[Why]
Commit:
- commit 5aa1dfcdf0a4 ("drm/mst: Refactor the flow for payload allocation/removement")
accidently overwrite the commit
- commit 54d217406afe ("drm: use mgr->dev in drm_dbg_kms in drm_dp_add_payload_part2")
which cause regression.

[How]
Recover the original NULL fix and remove the unnecessary input parameter 'state' for
drm_dp_add_payload_part2().

Fixes: 5aa1dfcdf0a4 ("drm/mst: Refactor the flow for payload allocation/removement")
Reported-by: Leon Weiß <leon.weiss@ruhr-uni-bochum.de>
Link: https://lore.kernel.org/r/38c253ea42072cc825dc969ac4e6b9b600371cc8.camel@ruhr-uni-bochum.de/
Cc: lyude@redhat.com
Cc: imre.deak@intel.com
Cc: stable@vger.kernel.org
Cc: regressions@lists.linux.dev
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Wayne Lin <Wayne.Lin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240307062957.2323620-1-Wayne.Lin@amd.com
---
 include/drm/display/drm_dp_mst_helper.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h
index 3546b58a121b..cfe096389d94 100644
--- a/include/drm/display/drm_dp_mst_helper.h
+++ b/include/drm/display/drm_dp_mst_helper.h
@@ -871,7 +871,6 @@ int drm_dp_add_payload_part1(struct drm_dp_mst_topology_mgr *mgr,
 			     struct drm_dp_mst_topology_state *mst_state,
 			     struct drm_dp_mst_atomic_payload *payload);
 int drm_dp_add_payload_part2(struct drm_dp_mst_topology_mgr *mgr,
-			     struct drm_atomic_state *state,
 			     struct drm_dp_mst_atomic_payload *payload);
 void drm_dp_remove_payload_part1(struct drm_dp_mst_topology_mgr *mgr,
 				 struct drm_dp_mst_topology_state *mst_state,
-- 
cgit v1.2.3


From 5716146295183651cfd89ae2ea46c5ef0d31faa8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 May 2024 09:38:35 +0200
Subject: wifi: regulatory: remove extra documentation

The struct member country_ie_checksum doesn't exist, so
don't document it.

Link: https://msgid.link/20240515093852.ebcc9673558b.Ie0b58c1249c6375c60859fa6474d7cdd8862b065@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/regulatory.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index ebf9e028d1ef..a103f4c8cf75 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -71,8 +71,6 @@ enum environment_cap {
  *	CRDA and can be used by other regulatory requests. When a
  *	the last request is not yet processed we must yield until it
  *	is processed before processing any new requests.
- * @country_ie_checksum: checksum of the last processed and accepted
- *	country IE
  * @country_ie_env: lets us know if the AP is telling us we are outdoor,
  *	indoor, or if it doesn't matter
  * @list: used to insert into the reg_requests_list linked list
-- 
cgit v1.2.3


From 3bb8dce41c023e39ec4d79a83742403b5064a276 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 May 2024 09:38:36 +0200
Subject: wifi: ieee80211: add missing doc short descriptions

Some structures erroneously don't have a short description,
add the missing descriptions.

Link: https://msgid.link/20240515093852.16f4355e918e.I940276a4fb006ada68ab1a3e6077e3229fff0f14@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index de2dce743ee2..713266ce48a7 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1101,7 +1101,7 @@ enum ieee80211_vht_opmode_bits {
 };
 
 /**
- * enum ieee80211_s1g_chanwidth
+ * enum ieee80211_s1g_chanwidth - S1G channel widths
  * These are defined in IEEE802.11-2016ah Table 10-20
  * as BSS Channel Width
  *
@@ -4145,7 +4145,7 @@ enum ieee80211_idle_options {
 };
 
 /**
- * struct ieee80211_bss_max_idle_period_ie
+ * struct ieee80211_bss_max_idle_period_ie - BSS max idle period element struct
  *
  * This structure refers to "BSS Max idle period element"
  *
@@ -4180,7 +4180,7 @@ enum ieee80211_sa_query_action {
 };
 
 /**
- * struct ieee80211_bssid_index
+ * struct ieee80211_bssid_index - multiple BSSID index element structure
  *
  * This structure refers to "Multiple BSSID-index element"
  *
@@ -4195,7 +4195,8 @@ struct ieee80211_bssid_index {
 };
 
 /**
- * struct ieee80211_multiple_bssid_configuration
+ * struct ieee80211_multiple_bssid_configuration - multiple BSSID configuration
+ *	element structure
  *
  * This structure refers to "Multiple BSSID Configuration element"
  *
-- 
cgit v1.2.3


From 1c26f09b20bafc92e0de80e84942be77d4d3c61e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 May 2024 09:38:37 +0200
Subject: wifi: radiotap: document ieee80211_get_radiotap_len() return value

Document the return value of ieee80211_get_radiotap_len() in
the proper kernel-doc format.

Link: https://msgid.link/20240515093852.143aadfdb094.I8795ec1e8cfd7106d58325fb514bae92625fb45c@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/ieee80211_radiotap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index 925bac726a92..91762faecc13 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -582,6 +582,7 @@ enum ieee80211_radiotap_eht_usig_tb {
 /**
  * ieee80211_get_radiotap_len - get radiotap header length
  * @data: pointer to the header
+ * Return: the radiotap header length
  */
 static inline u16 ieee80211_get_radiotap_len(const char *data)
 {
-- 
cgit v1.2.3


From 54856871298c9a8717450351699a8e6fe706101c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 May 2024 09:38:38 +0200
Subject: wifi: ieee80211: remove ieee80211_next_tbtt_present()

This is actually completely equivalent to the other function
ieee80211_is_s1g_short_beacon(), but open-codes the logic.
Implement the necessary logic in ieee80211_is_s1g_short_beacon()
and remove ieee80211_next_tbtt_present().

Link: https://msgid.link/20240515093852.774ced74dea8.I152525b4cff6e6a25be6c48fe6a4b89f17bab8a9@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 713266ce48a7..72b351abb4f6 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -604,25 +604,15 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc)
 }
 
 /**
- * ieee80211_next_tbtt_present - check if IEEE80211_FTYPE_EXT &&
- * IEEE80211_STYPE_S1G_BEACON && IEEE80211_S1G_BCN_NEXT_TBTT
- * @fc: frame control bytes in little-endian byteorder
- */
-static inline bool ieee80211_next_tbtt_present(__le16 fc)
-{
-	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) ==
-	       cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON) &&
-	       fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT);
-}
-
-/**
- * ieee80211_is_s1g_short_beacon - check if next tbtt present bit is set. Only
- * true for S1G beacons when they're short.
+ * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is an S1G short beacon,
+ *	i.e. it is an S1G beacon with 'next TBTT' flag set
  */
 static inline bool ieee80211_is_s1g_short_beacon(__le16 fc)
 {
-	return ieee80211_is_s1g_beacon(fc) && ieee80211_next_tbtt_present(fc);
+	return ieee80211_is_s1g_beacon(fc) &&
+		(fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT));
 }
 
 /**
-- 
cgit v1.2.3


From 3c75e99c7036a87f159dbd526060554afb3482f5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 May 2024 09:38:39 +0200
Subject: wifi: ieee80211: document function return values

These are all missing, as pointed out when running kernel-doc.
Add return value documentation and fix some small things while
at it.

Link: https://msgid.link/20240515093852.1cd5ad8f354d.Idc16e9767fa42de80b659c32efc58aea38c26996@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 106 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 84 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 72b351abb4f6..595f83783f0e 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -373,6 +373,7 @@ struct ieee80211_trigger {
 /**
  * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame has to-DS set
  */
 static inline bool ieee80211_has_tods(__le16 fc)
 {
@@ -382,6 +383,7 @@ static inline bool ieee80211_has_tods(__le16 fc)
 /**
  * ieee80211_has_fromds - check if IEEE80211_FCTL_FROMDS is set
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame has from-DS set
  */
 static inline bool ieee80211_has_fromds(__le16 fc)
 {
@@ -391,6 +393,7 @@ static inline bool ieee80211_has_fromds(__le16 fc)
 /**
  * ieee80211_has_a4 - check if IEEE80211_FCTL_TODS and IEEE80211_FCTL_FROMDS are set
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not it's a 4-address frame (from-DS and to-DS set)
  */
 static inline bool ieee80211_has_a4(__le16 fc)
 {
@@ -401,6 +404,7 @@ static inline bool ieee80211_has_a4(__le16 fc)
 /**
  * ieee80211_has_morefrags - check if IEEE80211_FCTL_MOREFRAGS is set
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame has more fragments (more frags bit set)
  */
 static inline bool ieee80211_has_morefrags(__le16 fc)
 {
@@ -410,6 +414,7 @@ static inline bool ieee80211_has_morefrags(__le16 fc)
 /**
  * ieee80211_has_retry - check if IEEE80211_FCTL_RETRY is set
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the retry flag is set
  */
 static inline bool ieee80211_has_retry(__le16 fc)
 {
@@ -419,6 +424,7 @@ static inline bool ieee80211_has_retry(__le16 fc)
 /**
  * ieee80211_has_pm - check if IEEE80211_FCTL_PM is set
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the power management flag is set
  */
 static inline bool ieee80211_has_pm(__le16 fc)
 {
@@ -428,6 +434,7 @@ static inline bool ieee80211_has_pm(__le16 fc)
 /**
  * ieee80211_has_moredata - check if IEEE80211_FCTL_MOREDATA is set
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the more data flag is set
  */
 static inline bool ieee80211_has_moredata(__le16 fc)
 {
@@ -437,6 +444,7 @@ static inline bool ieee80211_has_moredata(__le16 fc)
 /**
  * ieee80211_has_protected - check if IEEE80211_FCTL_PROTECTED is set
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the protected flag is set
  */
 static inline bool ieee80211_has_protected(__le16 fc)
 {
@@ -446,6 +454,7 @@ static inline bool ieee80211_has_protected(__le16 fc)
 /**
  * ieee80211_has_order - check if IEEE80211_FCTL_ORDER is set
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the order flag is set
  */
 static inline bool ieee80211_has_order(__le16 fc)
 {
@@ -455,6 +464,7 @@ static inline bool ieee80211_has_order(__le16 fc)
 /**
  * ieee80211_is_mgmt - check if type is IEEE80211_FTYPE_MGMT
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame type is management
  */
 static inline bool ieee80211_is_mgmt(__le16 fc)
 {
@@ -465,6 +475,7 @@ static inline bool ieee80211_is_mgmt(__le16 fc)
 /**
  * ieee80211_is_ctl - check if type is IEEE80211_FTYPE_CTL
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame type is control
  */
 static inline bool ieee80211_is_ctl(__le16 fc)
 {
@@ -475,6 +486,7 @@ static inline bool ieee80211_is_ctl(__le16 fc)
 /**
  * ieee80211_is_data - check if type is IEEE80211_FTYPE_DATA
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a data frame
  */
 static inline bool ieee80211_is_data(__le16 fc)
 {
@@ -485,6 +497,7 @@ static inline bool ieee80211_is_data(__le16 fc)
 /**
  * ieee80211_is_ext - check if type is IEEE80211_FTYPE_EXT
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame type is extended
  */
 static inline bool ieee80211_is_ext(__le16 fc)
 {
@@ -496,6 +509,7 @@ static inline bool ieee80211_is_ext(__le16 fc)
 /**
  * ieee80211_is_data_qos - check if type is IEEE80211_FTYPE_DATA and IEEE80211_STYPE_QOS_DATA is set
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a QoS data frame
  */
 static inline bool ieee80211_is_data_qos(__le16 fc)
 {
@@ -510,6 +524,8 @@ static inline bool ieee80211_is_data_qos(__le16 fc)
 /**
  * ieee80211_is_data_present - check if type is IEEE80211_FTYPE_DATA and has data
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a QoS data frame that has data
+ *	(i.e. is not null data)
  */
 static inline bool ieee80211_is_data_present(__le16 fc)
 {
@@ -524,6 +540,7 @@ static inline bool ieee80211_is_data_present(__le16 fc)
 /**
  * ieee80211_is_assoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_REQ
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is an association request
  */
 static inline bool ieee80211_is_assoc_req(__le16 fc)
 {
@@ -534,6 +551,7 @@ static inline bool ieee80211_is_assoc_req(__le16 fc)
 /**
  * ieee80211_is_assoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_RESP
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is an association response
  */
 static inline bool ieee80211_is_assoc_resp(__le16 fc)
 {
@@ -544,6 +562,7 @@ static inline bool ieee80211_is_assoc_resp(__le16 fc)
 /**
  * ieee80211_is_reassoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_REQ
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a reassociation request
  */
 static inline bool ieee80211_is_reassoc_req(__le16 fc)
 {
@@ -554,6 +573,7 @@ static inline bool ieee80211_is_reassoc_req(__le16 fc)
 /**
  * ieee80211_is_reassoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_RESP
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a reassociation response
  */
 static inline bool ieee80211_is_reassoc_resp(__le16 fc)
 {
@@ -564,6 +584,7 @@ static inline bool ieee80211_is_reassoc_resp(__le16 fc)
 /**
  * ieee80211_is_probe_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_REQ
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a probe request
  */
 static inline bool ieee80211_is_probe_req(__le16 fc)
 {
@@ -574,6 +595,7 @@ static inline bool ieee80211_is_probe_req(__le16 fc)
 /**
  * ieee80211_is_probe_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_RESP
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a probe response
  */
 static inline bool ieee80211_is_probe_resp(__le16 fc)
 {
@@ -584,6 +606,7 @@ static inline bool ieee80211_is_probe_resp(__le16 fc)
 /**
  * ieee80211_is_beacon - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_BEACON
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a (regular, not S1G) beacon
  */
 static inline bool ieee80211_is_beacon(__le16 fc)
 {
@@ -595,6 +618,7 @@ static inline bool ieee80211_is_beacon(__le16 fc)
  * ieee80211_is_s1g_beacon - check if IEEE80211_FTYPE_EXT &&
  * IEEE80211_STYPE_S1G_BEACON
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is an S1G beacon
  */
 static inline bool ieee80211_is_s1g_beacon(__le16 fc)
 {
@@ -618,6 +642,7 @@ static inline bool ieee80211_is_s1g_short_beacon(__le16 fc)
 /**
  * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is an ATIM frame
  */
 static inline bool ieee80211_is_atim(__le16 fc)
 {
@@ -628,6 +653,7 @@ static inline bool ieee80211_is_atim(__le16 fc)
 /**
  * ieee80211_is_disassoc - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DISASSOC
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a disassociation frame
  */
 static inline bool ieee80211_is_disassoc(__le16 fc)
 {
@@ -638,6 +664,7 @@ static inline bool ieee80211_is_disassoc(__le16 fc)
 /**
  * ieee80211_is_auth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_AUTH
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is an authentication frame
  */
 static inline bool ieee80211_is_auth(__le16 fc)
 {
@@ -648,6 +675,7 @@ static inline bool ieee80211_is_auth(__le16 fc)
 /**
  * ieee80211_is_deauth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DEAUTH
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a deauthentication frame
  */
 static inline bool ieee80211_is_deauth(__le16 fc)
 {
@@ -658,6 +686,7 @@ static inline bool ieee80211_is_deauth(__le16 fc)
 /**
  * ieee80211_is_action - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ACTION
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is an action frame
  */
 static inline bool ieee80211_is_action(__le16 fc)
 {
@@ -668,6 +697,7 @@ static inline bool ieee80211_is_action(__le16 fc)
 /**
  * ieee80211_is_back_req - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK_REQ
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a block-ACK request frame
  */
 static inline bool ieee80211_is_back_req(__le16 fc)
 {
@@ -678,6 +708,7 @@ static inline bool ieee80211_is_back_req(__le16 fc)
 /**
  * ieee80211_is_back - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a block-ACK frame
  */
 static inline bool ieee80211_is_back(__le16 fc)
 {
@@ -688,6 +719,7 @@ static inline bool ieee80211_is_back(__le16 fc)
 /**
  * ieee80211_is_pspoll - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_PSPOLL
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a PS-poll frame
  */
 static inline bool ieee80211_is_pspoll(__le16 fc)
 {
@@ -698,6 +730,7 @@ static inline bool ieee80211_is_pspoll(__le16 fc)
 /**
  * ieee80211_is_rts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_RTS
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is an RTS frame
  */
 static inline bool ieee80211_is_rts(__le16 fc)
 {
@@ -708,6 +741,7 @@ static inline bool ieee80211_is_rts(__le16 fc)
 /**
  * ieee80211_is_cts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CTS
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a CTS frame
  */
 static inline bool ieee80211_is_cts(__le16 fc)
 {
@@ -718,6 +752,7 @@ static inline bool ieee80211_is_cts(__le16 fc)
 /**
  * ieee80211_is_ack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_ACK
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is an ACK frame
  */
 static inline bool ieee80211_is_ack(__le16 fc)
 {
@@ -728,6 +763,7 @@ static inline bool ieee80211_is_ack(__le16 fc)
 /**
  * ieee80211_is_cfend - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFEND
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a CF-end frame
  */
 static inline bool ieee80211_is_cfend(__le16 fc)
 {
@@ -738,6 +774,7 @@ static inline bool ieee80211_is_cfend(__le16 fc)
 /**
  * ieee80211_is_cfendack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFENDACK
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a CF-end-ack frame
  */
 static inline bool ieee80211_is_cfendack(__le16 fc)
 {
@@ -748,6 +785,7 @@ static inline bool ieee80211_is_cfendack(__le16 fc)
 /**
  * ieee80211_is_nullfunc - check if frame is a regular (non-QoS) nullfunc frame
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a nullfunc frame
  */
 static inline bool ieee80211_is_nullfunc(__le16 fc)
 {
@@ -758,6 +796,7 @@ static inline bool ieee80211_is_nullfunc(__le16 fc)
 /**
  * ieee80211_is_qos_nullfunc - check if frame is a QoS nullfunc frame
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a QoS nullfunc frame
  */
 static inline bool ieee80211_is_qos_nullfunc(__le16 fc)
 {
@@ -768,6 +807,7 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc)
 /**
  * ieee80211_is_trigger - check if frame is trigger frame
  * @fc: frame control field in little-endian byteorder
+ * Return: whether or not the frame is a trigger frame
  */
 static inline bool ieee80211_is_trigger(__le16 fc)
 {
@@ -778,6 +818,7 @@ static inline bool ieee80211_is_trigger(__le16 fc)
 /**
  * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame
  * @fc: frame control bytes in little-endian byteorder
+ * Return: whether or not the frame is a nullfunc or QoS nullfunc frame
  */
 static inline bool ieee80211_is_any_nullfunc(__le16 fc)
 {
@@ -787,6 +828,8 @@ static inline bool ieee80211_is_any_nullfunc(__le16 fc)
 /**
  * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set
  * @seq_ctrl: frame sequence control bytes in little-endian byteorder
+ * Return: whether or not the frame is the first fragment (also true if
+ *	it's not fragmented at all)
  */
 static inline bool ieee80211_is_first_frag(__le16 seq_ctrl)
 {
@@ -796,6 +839,7 @@ static inline bool ieee80211_is_first_frag(__le16 seq_ctrl)
 /**
  * ieee80211_is_frag - check if a frame is a fragment
  * @hdr: 802.11 header of the frame
+ * Return: whether or not the frame is a fragment
  */
 static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr)
 {
@@ -2349,6 +2393,8 @@ struct ieee80211_eht_operation_info {
  * @max_vht_nss: current maximum NSS as advertised by the STA in
  *	operating mode notification, can be 0 in which case the
  *	capability data will be used to derive this (from MCS support)
+ * Return: The maximum NSS that can be used for the given bandwidth/MCS
+ *	combination
  *
  * Due to the VHT Extended NSS Bandwidth Support, the maximum NSS can
  * vary for a given BW/MCS. This function parses the data.
@@ -4317,6 +4363,7 @@ struct ieee80211_he_6ghz_capa {
 /**
  * ieee80211_get_qos_ctl - get pointer to qos control bytes
  * @hdr: the frame
+ * Return: a pointer to the QoS control field in the frame header
  *
  * The qos ctrl bytes come after the frame_control, duration, seq_num
  * and 3 or 4 addresses of length ETH_ALEN. Checks frame_control to choose
@@ -4339,6 +4386,7 @@ static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr)
 /**
  * ieee80211_get_tid - get qos TID
  * @hdr: the frame
+ * Return: the TID from the QoS control field
  */
 static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr)
 {
@@ -4350,6 +4398,7 @@ static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr)
 /**
  * ieee80211_get_SA - get pointer to SA
  * @hdr: the frame
+ * Return: a pointer to the source address (SA)
  *
  * Given an 802.11 frame, this function returns the offset
  * to the source address (SA). It does not verify that the
@@ -4369,6 +4418,7 @@ static inline u8 *ieee80211_get_SA(struct ieee80211_hdr *hdr)
 /**
  * ieee80211_get_DA - get pointer to DA
  * @hdr: the frame
+ * Return: a pointer to the destination address (DA)
  *
  * Given an 802.11 frame, this function returns the offset
  * to the destination address (DA). It does not verify that
@@ -4387,6 +4437,7 @@ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr)
 /**
  * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU
  * @skb: the skb to check, starting with the 802.11 header
+ * Return: whether or not the MMPDU is bufferable
  */
 static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb)
 {
@@ -4425,6 +4476,7 @@ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb)
 /**
  * _ieee80211_is_robust_mgmt_frame - check if frame is a robust management frame
  * @hdr: the frame (buffer must include at least the first octet of payload)
+ * Return: whether or not the frame is a robust management frame
  */
 static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
 {
@@ -4461,6 +4513,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
 /**
  * ieee80211_is_robust_mgmt_frame - check if skb contains a robust mgmt frame
  * @skb: the skb containing the frame, length will be checked
+ * Return: whether or not the frame is a robust management frame
  */
 static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb)
 {
@@ -4473,6 +4526,7 @@ static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb)
  * ieee80211_is_public_action - check if frame is a public action frame
  * @hdr: the frame
  * @len: length of the frame
+ * Return: whether or not the frame is a public action frame
  */
 static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr,
 					      size_t len)
@@ -4518,8 +4572,9 @@ ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb)
 
 /**
  * _ieee80211_is_group_privacy_action - check if frame is a group addressed
- * privacy action frame
+ *	privacy action frame
  * @hdr: the frame
+ * Return: whether or not the frame is a group addressed privacy action frame
  */
 static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr)
 {
@@ -4535,8 +4590,9 @@ static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr)
 
 /**
  * ieee80211_is_group_privacy_action - check if frame is a group addressed
- * privacy action frame
+ *	privacy action frame
  * @skb: the skb containing the frame, length will be checked
+ * Return: whether or not the frame is a group addressed privacy action frame
  */
 static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb)
 {
@@ -4548,6 +4604,7 @@ static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb)
 /**
  * ieee80211_tu_to_usec - convert time units (TU) to microseconds
  * @tu: the TUs
+ * Return: the time value converted to microseconds
  */
 static inline unsigned long ieee80211_tu_to_usec(unsigned long tu)
 {
@@ -4559,6 +4616,7 @@ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu)
  * @tim: the TIM IE
  * @tim_len: length of the TIM IE
  * @aid: the AID to look for
+ * Return: whether or not traffic is indicated in the TIM for the given AID
  */
 static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
 				       u8 tim_len, u16 aid)
@@ -4585,8 +4643,10 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
 }
 
 /**
- * ieee80211_get_tdls_action - get tdls packet action (or -1, if not tdls packet)
+ * ieee80211_get_tdls_action - get TDLS action code
  * @skb: the skb containing the frame, length will not be checked
+ * Return: the TDLS action code, or -1 if it's not an encapsulated TDLS action
+ *	frame
  *
  * This function assumes the frame is a data frame, and that the network header
  * is in the correct place.
@@ -4626,6 +4686,7 @@ static inline int ieee80211_get_tdls_action(struct sk_buff *skb)
 /**
  * ieee80211_action_contains_tpc - checks if the frame contains TPC element
  * @skb: the skb containing the frame, length will be checked
+ * Return: %true if the frame contains a TPC element, %false otherwise
  *
  * This function checks if it's either TPC report action frame or Link
  * Measurement report action frame as defined in IEEE Std. 802.11-2012 8.5.2.5
@@ -4743,6 +4804,7 @@ struct element {
  * @element: element pointer after for_each_element() or friends
  * @data: same data pointer as passed to for_each_element() or friends
  * @datalen: same data length as passed to for_each_element() or friends
+ * Return: %true if all elements were iterated, %false otherwise; see notes
  *
  * This function returns %true if all the data was parsed or considered
  * while walking the elements. Only use this if your for_each_element()
@@ -4946,6 +5008,7 @@ struct ieee80211_mle_tdls_common_info {
  * ieee80211_mle_common_size - check multi-link element common size
  * @data: multi-link element, must already be checked for size using
  *	ieee80211_mle_size_ok()
+ * Return: the size of the multi-link element's "common" subfield 
  */
 static inline u8 ieee80211_mle_common_size(const u8 *data)
 {
@@ -4978,11 +5041,10 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
 /**
  * ieee80211_mle_get_link_id - returns the link ID
  * @data: the basic multi link element
+ * Return: the link ID, or -1 if not present
  *
  * The element is assumed to be of the correct type (BASIC) and big enough,
  * this must be checked using ieee80211_mle_type_ok().
- *
- * If the BSS link ID can't be found, -1 will be returned
  */
 static inline int ieee80211_mle_get_link_id(const u8 *data)
 {
@@ -5002,12 +5064,10 @@ static inline int ieee80211_mle_get_link_id(const u8 *data)
 /**
  * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count
  * @data: pointer to the basic multi link element
+ * Return: the BSS Parameter Change Count field value, or -1 if not present
  *
  * The element is assumed to be of the correct type (BASIC) and big enough,
  * this must be checked using ieee80211_mle_type_ok().
- *
- * If the BSS parameter change count value can't be found (the presence bit
- * for it is clear), -1 will be returned.
  */
 static inline int
 ieee80211_mle_get_bss_param_ch_cnt(const u8 *data)
@@ -5030,13 +5090,13 @@ ieee80211_mle_get_bss_param_ch_cnt(const u8 *data)
 
 /**
  * ieee80211_mle_get_eml_med_sync_delay - returns the medium sync delay
- * @data: pointer to the multi link EHT IE
+ * @data: pointer to the multi-link element
+ * Return: the medium synchronization delay field value from the multi-link
+ *	element, or the default value (%IEEE80211_MED_SYNC_DELAY_DEFAULT)
+ *	if not present
  *
  * The element is assumed to be of the correct type (BASIC) and big enough,
  * this must be checked using ieee80211_mle_type_ok().
- *
- * If the medium synchronization is not present, then the default value is
- * returned.
  */
 static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data)
 {
@@ -5060,12 +5120,12 @@ static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data)
 
 /**
  * ieee80211_mle_get_eml_cap - returns the EML capability
- * @data: pointer to the multi link EHT IE
+ * @data: pointer to the multi-link element
+ * Return: the EML capability field value from the multi-link element,
+ *	or 0 if not present
  *
  * The element is assumed to be of the correct type (BASIC) and big enough,
  * this must be checked using ieee80211_mle_type_ok().
- *
- * If the EML capability is not present, 0 will be returned.
  */
 static inline u16 ieee80211_mle_get_eml_cap(const u8 *data)
 {
@@ -5091,13 +5151,12 @@ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data)
 
 /**
  * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations.
- * @data: pointer to the multi link EHT IE
+ * @data: pointer to the multi-link element
+ * Return: the MLD capabilities and operations field value from the multi-link
+ *	element, or 0 if not present
  *
  * The element is assumed to be of the correct type (BASIC) and big enough,
  * this must be checked using ieee80211_mle_type_ok().
- *
- * If the MLD capabilities and operations field is not present, 0 will be
- * returned.
  */
 static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data)
 {
@@ -5128,12 +5187,11 @@ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data)
 
 /**
  * ieee80211_mle_get_mld_id - returns the MLD ID
- * @data: pointer to the multi link element
+ * @data: pointer to the multi-link element
+ * Return: The MLD ID in the given multi-link element, or 0 if not present
  *
  * The element is assumed to be of the correct type (BASIC) and big enough,
  * this must be checked using ieee80211_mle_type_ok().
- *
- * If the MLD ID is not present, 0 will be returned.
  */
 static inline u8 ieee80211_mle_get_mld_id(const u8 *data)
 {
@@ -5168,6 +5226,7 @@ static inline u8 ieee80211_mle_get_mld_id(const u8 *data)
  * ieee80211_mle_size_ok - validate multi-link element size
  * @data: pointer to the element data
  * @len: length of the containing element
+ * Return: whether or not the multi-link element size is OK
  */
 static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len)
 {
@@ -5237,6 +5296,7 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len)
  * @data: pointer to the element data
  * @type: expected type of the element
  * @len: length of the containing element
+ * Return: whether or not the multi-link element type matches and size is OK
  */
 static inline bool ieee80211_mle_type_ok(const u8 *data, u8 type, size_t len)
 {
@@ -5280,6 +5340,7 @@ struct ieee80211_mle_per_sta_profile {
  *	profile size
  * @data: pointer to the sub element data
  * @len: length of the containing sub element
+ * Return: %true if the STA profile is large enough, %false otherwise
  */
 static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data,
 							size_t len)
@@ -5364,6 +5425,7 @@ ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(const struct ieee80211_mle_per_sta
  *	element sta profile size.
  * @data: pointer to the sub element data
  * @len: length of the containing sub element
+ * Return: %true if the STA profile is large enough, %false otherwise
  */
 static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data,
 							 size_t len)
-- 
cgit v1.2.3


From 9d222c12834d80849afbad7b42822b8ffbbc025f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 May 2024 09:38:40 +0200
Subject: wifi: ieee80211: document two FTM related functions

Add some documentation to ieee80211_is_timing_measurement()
and ieee80211_is_ftm().

Link: https://msgid.link/20240515093852.229aa69e972c.Ifae6762a698e79cd5a49a055fe4c32330e826200@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 595f83783f0e..fb50a99daa93 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4731,6 +4731,11 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb)
 	return true;
 }
 
+/**
+ * ieee80211_is_timing_measurement - check if frame is timing measurement response
+ * @skb: the SKB to check
+ * Return: whether or not the frame is a valid timing measurement response
+ */
 static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb)
 {
 	struct ieee80211_mgmt *mgmt = (void *)skb->data;
@@ -4750,6 +4755,11 @@ static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb)
 	return false;
 }
 
+/**
+ * ieee80211_is_ftm - check if frame is FTM response
+ * @skb: the SKB to check
+ * Return: whether or not the frame is a valid FTM response action frame
+ */
 static inline bool ieee80211_is_ftm(struct sk_buff *skb)
 {
 	struct ieee80211_mgmt *mgmt = (void *)skb->data;
-- 
cgit v1.2.3


From 8592fd7ccc95a7cf222985e6297041463e8b4e9d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 6 May 2024 21:37:54 +0200
Subject: wifi: ieee80211/ath11k: remove IEEE80211_MAX_NUM_PWR_LEVEL

The define IEEE80211_MAX_NUM_PWR_LEVEL doesn't make much
sense. Yes, that table has a maximum value of 8, and the
table will actually remain that way, but EHT introduced
a way to encode more levels for 320 MHz channels.

Remove IEEE80211_MAX_NUM_PWR_LEVEL and, for ath11k being
the only user, add ATH11K_NUM_PWR_LEVELS, where it makes
sense since it cannot support 320 MHz channels.

Acked-by: Kalle Valo <kvalo@kernel.org>
Link: https://msgid.link/20240506214536.9818e5471055.Icece7e47e963d6b68e0d97ba13c102b37fbaa689@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index fb50a99daa93..1c3a683a3ee2 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2833,11 +2833,6 @@ struct ieee80211_he_6ghz_oper {
  * So it it totally max 8 Transmit Power Envelope element.
  */
 #define IEEE80211_TPE_MAX_IE_COUNT	8
-/*
- * In "Table 9-277—Meaning of Maximum Transmit Power Count subfield"
- * of "IEEE Std 802.11ax™‐2021", the max power level is 8.
- */
-#define IEEE80211_MAX_NUM_PWR_LEVEL	8
 
 #define IEEE80211_TPE_MAX_POWER_COUNT	8
 
-- 
cgit v1.2.3


From 39dc8b8ea387ce7f4fe2d2d6d550ed52aa9aa040 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 6 May 2024 21:37:56 +0200
Subject: wifi: mac80211: pass parsed TPE data to drivers

Instead of passing the full TPE elements, in all their glory
and mixed up data formats for HE backward compatibility, parse
them fully into the right values, and pass that to the drivers.

Also introduce proper validation already in mac80211, so that
drivers don't need to do it, and parse the EHT portions.

The code now passes the values in the right order according to
the channel used by an interface, which could also be a subset
of the data advertised by the AP, if we couldn't connect with
the full bandwidth (for whatever reason.)

Also add kunit tests for the more complicated bits of it.

Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Acked-by: Kalle Valo <kvalo@kernel.org>
Link: https://msgid.link/20240506214536.2aa839969b60.I265b28209e0b29772b2f125f7f83de44a4da877b@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 102 +++++++++++++++++++++++++++++++++++++++-------
 include/net/mac80211.h    |  41 +++++++++++++++++--
 2 files changed, 124 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 1c3a683a3ee2..769008a51809 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2825,17 +2825,6 @@ struct ieee80211_he_6ghz_oper {
 	u8 minrate;
 } __packed;
 
-/*
- * In "9.4.2.161 Transmit Power Envelope element" of "IEEE Std 802.11ax-2021",
- * it show four types in "Table 9-275a-Maximum Transmit Power Interpretation
- * subfield encoding", and two category for each type in "Table E-12-Regulatory
- * Info subfield encoding in the United States".
- * So it it totally max 8 Transmit Power Envelope element.
- */
-#define IEEE80211_TPE_MAX_IE_COUNT	8
-
-#define IEEE80211_TPE_MAX_POWER_COUNT	8
-
 /* transmit power interpretation type of transmit power envelope element */
 enum ieee80211_tx_power_intrpt_type {
 	IEEE80211_TPE_LOCAL_EIRP,
@@ -2844,24 +2833,107 @@ enum ieee80211_tx_power_intrpt_type {
 	IEEE80211_TPE_REG_CLIENT_EIRP_PSD,
 };
 
+/* category type of transmit power envelope element */
+enum ieee80211_tx_power_category_6ghz {
+	IEEE80211_TPE_CAT_6GHZ_DEFAULT = 0,
+	IEEE80211_TPE_CAT_6GHZ_SUBORDINATE = 1,
+};
+
+/*
+ * For IEEE80211_TPE_LOCAL_EIRP / IEEE80211_TPE_REG_CLIENT_EIRP,
+ * setting to 63.5 dBm means no constraint.
+ */
+#define IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT	127
+
+/*
+ * For IEEE80211_TPE_LOCAL_EIRP_PSD / IEEE80211_TPE_REG_CLIENT_EIRP_PSD,
+ * setting to 127 indicates no PSD limit for the 20 MHz channel.
+ */
+#define IEEE80211_TPE_PSD_NO_LIMIT		127
+
 /**
  * struct ieee80211_tx_pwr_env - Transmit Power Envelope
- * @tx_power_info: Transmit Power Information field
- * @tx_power: Maximum Transmit Power field
+ * @info: Transmit Power Information field
+ * @variable: Maximum Transmit Power field
  *
  * This structure represents the payload of the "Transmit Power
  * Envelope element" as described in IEEE Std 802.11ax-2021 section
  * 9.4.2.161
  */
 struct ieee80211_tx_pwr_env {
-	u8 tx_power_info;
-	s8 tx_power[IEEE80211_TPE_MAX_POWER_COUNT];
+	u8 info;
+	u8 variable[];
 } __packed;
 
 #define IEEE80211_TX_PWR_ENV_INFO_COUNT 0x7
 #define IEEE80211_TX_PWR_ENV_INFO_INTERPRET 0x38
 #define IEEE80211_TX_PWR_ENV_INFO_CATEGORY 0xC0
 
+#define IEEE80211_TX_PWR_ENV_EXT_COUNT	0xF
+
+static inline bool ieee80211_valid_tpe_element(const u8 *data, u8 len)
+{
+	const struct ieee80211_tx_pwr_env *env = (const void *)data;
+	u8 count, interpret, category;
+	u8 needed = sizeof(*env);
+	u8 N; /* also called N in the spec */
+
+	if (len < needed)
+		return false;
+
+	count = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_COUNT);
+	interpret = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_INTERPRET);
+	category = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_CATEGORY);
+
+	switch (category) {
+	case IEEE80211_TPE_CAT_6GHZ_DEFAULT:
+	case IEEE80211_TPE_CAT_6GHZ_SUBORDINATE:
+		break;
+	default:
+		return false;
+	}
+
+	switch (interpret) {
+	case IEEE80211_TPE_LOCAL_EIRP:
+	case IEEE80211_TPE_REG_CLIENT_EIRP:
+		if (count > 3)
+			return false;
+
+		/* count == 0 encodes 1 value for 20 MHz, etc. */
+		needed += count + 1;
+
+		if (len < needed)
+			return false;
+
+		/* there can be extension fields not accounted for in 'count' */
+
+		return true;
+	case IEEE80211_TPE_LOCAL_EIRP_PSD:
+	case IEEE80211_TPE_REG_CLIENT_EIRP_PSD:
+		if (count > 4)
+			return false;
+
+		N = count ? 1 << (count - 1) : 1;
+		needed += N;
+
+		if (len < needed)
+			return false;
+
+		if (len > needed) {
+			u8 K = u8_get_bits(env->variable[N],
+					   IEEE80211_TX_PWR_ENV_EXT_COUNT);
+
+			needed += 1 + K;
+			if (len < needed)
+				return false;
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
 /*
  * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size
  * @he_oper_ie: byte data of the He Operations IE, stating from the byte
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index cafc664ee531..a4efbfb8d796 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -550,6 +550,39 @@ struct ieee80211_fils_discovery {
 	u32 max_interval;
 };
 
+#define IEEE80211_TPE_EIRP_ENTRIES_320MHZ	5
+struct ieee80211_parsed_tpe_eirp {
+	bool valid;
+	s8 power[IEEE80211_TPE_EIRP_ENTRIES_320MHZ];
+	u8 count;
+};
+
+#define IEEE80211_TPE_PSD_ENTRIES_320MHZ	16
+struct ieee80211_parsed_tpe_psd {
+	bool valid;
+	s8 power[IEEE80211_TPE_PSD_ENTRIES_320MHZ];
+	u8 count, n;
+};
+
+/**
+ * struct ieee80211_parsed_tpe - parsed transmit power envelope information
+ * @max_local: maximum local EIRP, one value for 20, 40, 80, 160, 320 MHz each
+ *	(indexed by TX power category)
+ * @max_reg_client: maximum regulatory client EIRP, one value for 20, 40, 80,
+ *	160, 320 MHz each
+ *	(indexed by TX power category)
+ * @psd_local: maximum local power spectral density, one value for each 20 MHz
+ *	subchannel per bss_conf's chanreq.oper
+ *	(indexed by TX power category)
+ * @psd_reg_client: maximum regulatory power spectral density, one value for
+ *	each 20 MHz subchannel per bss_conf's chanreq.oper
+ *	(indexed by TX power category)
+ */
+struct ieee80211_parsed_tpe {
+	struct ieee80211_parsed_tpe_eirp max_local[2], max_reg_client[2];
+	struct ieee80211_parsed_tpe_psd psd_local[2], psd_reg_client[2];
+};
+
 /**
  * struct ieee80211_bss_conf - holds the BSS's changing parameters
  *
@@ -662,8 +695,7 @@ struct ieee80211_fils_discovery {
  * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed
  *	to driver when rate control is offloaded to firmware.
  * @power_type: power type of BSS for 6 GHz
- * @tx_pwr_env: transmit power envelope array of BSS.
- * @tx_pwr_env_num: number of @tx_pwr_env.
+ * @tpe: transmit power envelope information
  * @pwr_reduction: power constraint of BSS.
  * @eht_support: does this BSS support EHT
  * @csa_active: marks whether a channel switch is going on.
@@ -766,8 +798,9 @@ struct ieee80211_bss_conf {
 	u32 unsol_bcast_probe_resp_interval;
 	struct cfg80211_bitrate_mask beacon_tx_rate;
 	enum ieee80211_ap_reg_power power_type;
-	struct ieee80211_tx_pwr_env tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT];
-	u8 tx_pwr_env_num;
+
+	struct ieee80211_parsed_tpe tpe;
+
 	u8 pwr_reduction;
 	bool eht_support;
 
-- 
cgit v1.2.3


From 5a009b42e0418d30b3ffaff2f46c534cd79b3f23 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 6 May 2024 21:37:57 +0200
Subject: wifi: mac80211: track changes in AP's TPE

If the TPE (transmit power envelope) is changed, detect and
report that to the driver.

Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240506214536.103dda923f45.I990877e409ab8eade9ed7c172272e0cae57256cf@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a4efbfb8d796..a59eacfe0480 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -362,6 +362,7 @@ struct ieee80211_vif_chanctx_switch {
  *	status changed.
  * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed.
  * @BSS_CHANGED_MLD_TTLM: negotiated TID to link mapping was changed
+ * @BSS_CHANGED_TPE: transmit power envelope changed
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -398,6 +399,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31,
 	BSS_CHANGED_MLD_VALID_LINKS	= BIT_ULL(33),
 	BSS_CHANGED_MLD_TTLM		= BIT_ULL(34),
+	BSS_CHANGED_TPE			= BIT_ULL(35),
 
 	/* when adding here, make sure to change ieee80211_reconfig */
 };
-- 
cgit v1.2.3


From f9a0757a4b2f5df4376963c25a3d7d7aeba78444 Mon Sep 17 00:00:00 2001
From: Pradeep Kumar Chitrapu <quic_pradeepc@quicinc.com>
Date: Wed, 15 May 2024 11:13:18 -0700
Subject: wifi: mac80211: Add EHT UL MU-MIMO flag in ieee80211_bss_conf

Add flag for Full Bandwidth UL MU-MIMO for EHT. This is utilized
to pass EHT MU-MIMO configurations from user space to driver
in AP mode.

Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1

Signed-off-by: Pradeep Kumar Chitrapu <quic_pradeepc@quicinc.com>
Link: https://msgid.link/20240515181327.12855-2-quic_pradeepc@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a59eacfe0480..ecfa65ade226 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -735,6 +735,9 @@ struct ieee80211_parsed_tpe {
  *	beamformee
  * @eht_mu_beamformer: in AP-mode, does this BSS enable operation as an EHT MU
  *	beamformer
+ * @eht_80mhz_full_bw_ul_mumimo: in AP-mode, does this BSS support the
+ *	reception of an EHT TB PPDU on an RU that spans the entire PPDU
+ *	bandwidth
  */
 struct ieee80211_bss_conf {
 	struct ieee80211_vif *vif;
@@ -828,6 +831,7 @@ struct ieee80211_bss_conf {
 	bool eht_su_beamformer;
 	bool eht_su_beamformee;
 	bool eht_mu_beamformer;
+	bool eht_80mhz_full_bw_ul_mumimo;
 };
 
 /**
-- 
cgit v1.2.3


From aa4ec06c455d0200eea0a4361cc58eb5b8bf68c4 Mon Sep 17 00:00:00 2001
From: Dmitry Antipov <dmantipov@yandex.ru>
Date: Fri, 17 May 2024 18:33:31 +0300
Subject: wifi: cfg80211: use __counted_by where appropriate

Annotate 'sub_specs' of 'struct cfg80211_sar_specs', 'channels'
of 'struct cfg80211_sched_scan_request', 'channels' of 'struct
cfg80211_wowlan_nd_match', and 'matches' of 'struct
cfg80211_wowlan_nd_info' with '__counted_by' attribute. Briefly
tested with clang 18.1.1 and CONFIG_UBSAN_BOUNDS running iwlwifi.

Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
Link: https://msgid.link/20240517153332.18271-1-dmantipov@yandex.ru
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index cbf1664dc569..d79180bec7a1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2200,7 +2200,7 @@ struct cfg80211_sar_sub_specs {
 struct cfg80211_sar_specs {
 	enum nl80211_sar_type type;
 	u32 num_sub_specs;
-	struct cfg80211_sar_sub_specs sub_specs[];
+	struct cfg80211_sar_sub_specs sub_specs[] __counted_by(num_sub_specs);
 };
 
 
@@ -2838,7 +2838,7 @@ struct cfg80211_sched_scan_request {
 	struct list_head list;
 
 	/* keep last */
-	struct ieee80211_channel *channels[];
+	struct ieee80211_channel *channels[] __counted_by(n_channels);
 };
 
 /**
@@ -3582,7 +3582,7 @@ struct cfg80211_coalesce {
 struct cfg80211_wowlan_nd_match {
 	struct cfg80211_ssid ssid;
 	int n_channels;
-	u32 channels[];
+	u32 channels[] __counted_by(n_channels);
 };
 
 /**
@@ -3596,7 +3596,7 @@ struct cfg80211_wowlan_nd_match {
  */
 struct cfg80211_wowlan_nd_info {
 	int n_matches;
-	struct cfg80211_wowlan_nd_match *matches[];
+	struct cfg80211_wowlan_nd_match *matches[] __counted_by(n_matches);
 };
 
 /**
-- 
cgit v1.2.3


From 3d9d313d518c5bc9e5ab6aeab86c9fa4bece095c Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 10 May 2024 18:08:13 +0300
Subject: drm/connector: update edid_blob_ptr documentation

Accessing the EDID via edid_blob_ptr causes chicken-and-egg
problems. Keep edid_blob_ptr as the userspace interface that should be
accessed via dedicated functions.

Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/b6aa1ea30ae85ef9e9814315d3437e82f0ba6754.1715353572.git.jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/drm_connector.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index fe88d7fc6b8f..58ee9adf9091 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -1636,8 +1636,12 @@ struct drm_connector {
 
 	/**
 	 * @edid_blob_ptr: DRM property containing EDID if present. Protected by
-	 * &drm_mode_config.mutex. This should be updated only by calling
+	 * &drm_mode_config.mutex.
+	 *
+	 * This must be updated only by calling drm_edid_connector_update() or
 	 * drm_connector_update_edid_property().
+	 *
+	 * This must not be used by drivers directly.
 	 */
 	struct drm_property_blob *edid_blob_ptr;
 
-- 
cgit v1.2.3


From 4d25ca2d6801cfcf26f7f39c561611ba5be99bf8 Mon Sep 17 00:00:00 2001
From: Abhishek Chauhan <quic_abchauha@quicinc.com>
Date: Thu, 9 May 2024 14:18:32 -0700
Subject: net: Rename mono_delivery_time to tstamp_type for scalabilty

mono_delivery_time was added to check if skb->tstamp has delivery
time in mono clock base (i.e. EDT) otherwise skb->tstamp has
timestamp in ingress and delivery_time at egress.

Renaming the bitfield from mono_delivery_time to tstamp_type is for
extensibilty for other timestamps such as userspace timestamp
(i.e. SO_TXTIME) set via sock opts.

As we are renaming the mono_delivery_time to tstamp_type, it makes
sense to start assigning tstamp_type based on enum defined
in this commit.

Earlier we used bool arg flag to check if the tstamp is mono in
function skb_set_delivery_time, Now the signature of the functions
accepts tstamp_type to distinguish between mono and real time.

Also skb_set_delivery_type_by_clockid is a new function which accepts
clockid to determine the tstamp_type.

In future tstamp_type:1 can be extended to support userspace timestamp
by increasing the bitfield.

Signed-off-by: Abhishek Chauhan <quic_abchauha@quicinc.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20240509211834.3235191-2-quic_abchauha@quicinc.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/linux/skbuff.h  | 52 ++++++++++++++++++++++++++++++++++++-------------
 include/net/inet_frag.h |  4 ++--
 2 files changed, 41 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c0b97c93a6de..3a721cc3b644 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -706,6 +706,11 @@ typedef unsigned int sk_buff_data_t;
 typedef unsigned char *sk_buff_data_t;
 #endif
 
+enum skb_tstamp_type {
+	SKB_CLOCK_REALTIME,
+	SKB_CLOCK_MONOTONIC,
+};
+
 /**
  * DOC: Basic sk_buff geometry
  *
@@ -823,10 +828,8 @@ typedef unsigned char *sk_buff_data_t;
  *	@dst_pending_confirm: need to confirm neighbour
  *	@decrypted: Decrypted SKB
  *	@slow_gro: state present at GRO time, slower prepare step required
- *	@mono_delivery_time: When set, skb->tstamp has the
- *		delivery_time in mono clock base (i.e. EDT).  Otherwise, the
- *		skb->tstamp has the (rcv) timestamp at ingress and
- *		delivery_time at egress.
+ *	@tstamp_type: When set, skb->tstamp has the
+ *		delivery_time clock base of skb->tstamp.
  *	@napi_id: id of the NAPI struct this skb came from
  *	@sender_cpu: (aka @napi_id) source CPU in XPS
  *	@alloc_cpu: CPU which did the skb allocation.
@@ -954,7 +957,7 @@ struct sk_buff {
 	/* private: */
 	__u8			__mono_tc_offset[0];
 	/* public: */
-	__u8			mono_delivery_time:1;	/* See SKB_MONO_DELIVERY_TIME_MASK */
+	__u8			tstamp_type:1;	/* See skb_tstamp_type */
 #ifdef CONFIG_NET_XGRESS
 	__u8			tc_at_ingress:1;	/* See TC_AT_INGRESS_MASK */
 	__u8			tc_skip_classify:1;
@@ -4183,7 +4186,7 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb,
 static inline void __net_timestamp(struct sk_buff *skb)
 {
 	skb->tstamp = ktime_get_real();
-	skb->mono_delivery_time = 0;
+	skb->tstamp_type = SKB_CLOCK_REALTIME;
 }
 
 static inline ktime_t net_timedelta(ktime_t t)
@@ -4192,10 +4195,33 @@ static inline ktime_t net_timedelta(ktime_t t)
 }
 
 static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
-					 bool mono)
+					 u8 tstamp_type)
 {
 	skb->tstamp = kt;
-	skb->mono_delivery_time = kt && mono;
+
+	if (kt)
+		skb->tstamp_type = tstamp_type;
+	else
+		skb->tstamp_type = SKB_CLOCK_REALTIME;
+}
+
+static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb,
+						    ktime_t kt, clockid_t clockid)
+{
+	u8 tstamp_type = SKB_CLOCK_REALTIME;
+
+	switch (clockid) {
+	case CLOCK_REALTIME:
+		break;
+	case CLOCK_MONOTONIC:
+		tstamp_type = SKB_CLOCK_MONOTONIC;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		kt = 0;
+	}
+
+	skb_set_delivery_time(skb, kt, tstamp_type);
 }
 
 DECLARE_STATIC_KEY_FALSE(netstamp_needed_key);
@@ -4205,8 +4231,8 @@ DECLARE_STATIC_KEY_FALSE(netstamp_needed_key);
  */
 static inline void skb_clear_delivery_time(struct sk_buff *skb)
 {
-	if (skb->mono_delivery_time) {
-		skb->mono_delivery_time = 0;
+	if (skb->tstamp_type) {
+		skb->tstamp_type = SKB_CLOCK_REALTIME;
 		if (static_branch_unlikely(&netstamp_needed_key))
 			skb->tstamp = ktime_get_real();
 		else
@@ -4216,7 +4242,7 @@ static inline void skb_clear_delivery_time(struct sk_buff *skb)
 
 static inline void skb_clear_tstamp(struct sk_buff *skb)
 {
-	if (skb->mono_delivery_time)
+	if (skb->tstamp_type)
 		return;
 
 	skb->tstamp = 0;
@@ -4224,7 +4250,7 @@ static inline void skb_clear_tstamp(struct sk_buff *skb)
 
 static inline ktime_t skb_tstamp(const struct sk_buff *skb)
 {
-	if (skb->mono_delivery_time)
+	if (skb->tstamp_type)
 		return 0;
 
 	return skb->tstamp;
@@ -4232,7 +4258,7 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb)
 
 static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond)
 {
-	if (!skb->mono_delivery_time && skb->tstamp)
+	if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp)
 		return skb->tstamp;
 
 	if (static_branch_unlikely(&netstamp_needed_key) || cond)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 153960663ce4..5af6eb14c5db 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -76,7 +76,7 @@ struct frag_v6_compare_key {
  * @stamp: timestamp of the last received fragment
  * @len: total length of the original datagram
  * @meat: length of received fragments so far
- * @mono_delivery_time: stamp has a mono delivery time (EDT)
+ * @tstamp_type: stamp has a mono delivery time (EDT)
  * @flags: fragment queue flags
  * @max_size: maximum received fragment size
  * @fqdir: pointer to struct fqdir
@@ -97,7 +97,7 @@ struct inet_frag_queue {
 	ktime_t			stamp;
 	int			len;
 	int			meat;
-	u8			mono_delivery_time;
+	u8			tstamp_type;
 	__u8			flags;
 	u16			max_size;
 	struct fqdir		*fqdir;
-- 
cgit v1.2.3


From 1693c5db6ab8262e6f5263f9d211855959aa5acd Mon Sep 17 00:00:00 2001
From: Abhishek Chauhan <quic_abchauha@quicinc.com>
Date: Thu, 9 May 2024 14:18:33 -0700
Subject: net: Add additional bit to support clockid_t timestamp type

tstamp_type is now set based on actual clockid_t compressed
into 2 bits.

To make the design scalable for future needs this commit bring in
the change to extend the tstamp_type:1 to tstamp_type:2 to support
other clockid_t timestamp.

We now support CLOCK_TAI as part of tstamp_type as part of this
commit with existing support CLOCK_MONOTONIC and CLOCK_REALTIME.

Signed-off-by: Abhishek Chauhan <quic_abchauha@quicinc.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20240509211834.3235191-3-quic_abchauha@quicinc.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/linux/skbuff.h   | 18 ++++++++++++------
 include/uapi/linux/bpf.h | 15 ++++++++++-----
 2 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3a721cc3b644..1e5c97daaa37 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -709,6 +709,8 @@ typedef unsigned char *sk_buff_data_t;
 enum skb_tstamp_type {
 	SKB_CLOCK_REALTIME,
 	SKB_CLOCK_MONOTONIC,
+	SKB_CLOCK_TAI,
+	__SKB_CLOCK_MAX = SKB_CLOCK_TAI,
 };
 
 /**
@@ -957,7 +959,7 @@ struct sk_buff {
 	/* private: */
 	__u8			__mono_tc_offset[0];
 	/* public: */
-	__u8			tstamp_type:1;	/* See skb_tstamp_type */
+	__u8			tstamp_type:2;	/* See skb_tstamp_type */
 #ifdef CONFIG_NET_XGRESS
 	__u8			tc_at_ingress:1;	/* See TC_AT_INGRESS_MASK */
 	__u8			tc_skip_classify:1;
@@ -1087,15 +1089,16 @@ struct sk_buff {
 #endif
 #define PKT_TYPE_OFFSET		offsetof(struct sk_buff, __pkt_type_offset)
 
-/* if you move tc_at_ingress or mono_delivery_time
+/* if you move tc_at_ingress or tstamp_type
  * around, you also must adapt these constants.
  */
 #ifdef __BIG_ENDIAN_BITFIELD
-#define SKB_MONO_DELIVERY_TIME_MASK	(1 << 7)
-#define TC_AT_INGRESS_MASK		(1 << 6)
+#define SKB_TSTAMP_TYPE_MASK		(3 << 6)
+#define SKB_TSTAMP_TYPE_RSHIFT		(6)
+#define TC_AT_INGRESS_MASK		(1 << 5)
 #else
-#define SKB_MONO_DELIVERY_TIME_MASK	(1 << 0)
-#define TC_AT_INGRESS_MASK		(1 << 1)
+#define SKB_TSTAMP_TYPE_MASK		(3)
+#define TC_AT_INGRESS_MASK		(1 << 2)
 #endif
 #define SKB_BF_MONO_TC_OFFSET		offsetof(struct sk_buff, __mono_tc_offset)
 
@@ -4216,6 +4219,9 @@ static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb,
 	case CLOCK_MONOTONIC:
 		tstamp_type = SKB_CLOCK_MONOTONIC;
 		break;
+	case CLOCK_TAI:
+		tstamp_type = SKB_CLOCK_TAI;
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		kt = 0;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 90706a47f6ff..25ea393cf084 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6207,12 +6207,17 @@ union {					\
 	__u64 :64;			\
 } __attribute__((aligned(8)))
 
+/* The enum used in skb->tstamp_type. It specifies the clock type
+ * of the time stored in the skb->tstamp.
+ */
 enum {
-	BPF_SKB_TSTAMP_UNSPEC,
-	BPF_SKB_TSTAMP_DELIVERY_MONO,	/* tstamp has mono delivery time */
-	/* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
-	 * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
-	 * and try to deduce it by ingress, egress or skb->sk->sk_clockid.
+	BPF_SKB_TSTAMP_UNSPEC = 0,		/* DEPRECATED */
+	BPF_SKB_TSTAMP_DELIVERY_MONO = 1,	/* DEPRECATED */
+	BPF_SKB_CLOCK_REALTIME = 0,
+	BPF_SKB_CLOCK_MONOTONIC = 1,
+	BPF_SKB_CLOCK_TAI = 2,
+	/* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle,
+	 * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid.
 	 */
 };
 
-- 
cgit v1.2.3


From 2c1713a8f1c94033a6e00aae4693ab03e8a3b9f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 18 May 2024 16:58:47 +0200
Subject: bpf: constify member bpf_sysctl_kern:: Table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sysctl core is preparing to only expose instances of struct ctl_table
as "const". This will also affect the ctl_table argument of sysctl handlers,
for which bpf_sysctl_kern::table is also used.

As the function prototype of all sysctl handlers throughout the tree
needs to stay consistent that change will be done in one commit.

To reduce the size of that final commit, switch this utility type which
is not bound by "typedef proc_handler" to "const struct ctl_table".

No functional change.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Joel Granados <j.granados@samsung.com>
Link: https://lore.kernel.org/bpf/20240518-sysctl-const-handler-bpf-v1-1-f0d7186743c1@weissschuh.net
---
 include/linux/filter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0f12cf01070e..b02aea291b7e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1406,7 +1406,7 @@ struct bpf_sock_ops_kern {
 
 struct bpf_sysctl_kern {
 	struct ctl_table_header *head;
-	struct ctl_table *table;
+	const struct ctl_table *table;
 	void *cur_val;
 	size_t cur_len;
 	void *new_val;
-- 
cgit v1.2.3


From 73e75e6fc352bdca08f7e0893d5b6bb37171bdd2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Koutn=C3=BD?= <mkoutny@suse.com>
Date: Tue, 21 May 2024 11:21:26 +0200
Subject: cgroup/pids: Separate semantics of pids.events related to pids.max
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, when pids.max limit is breached in the hierarchy, the event
is counted and reported in the cgroup where the forking task resides.

This decouples the limit and the notification caused by the limit making
it hard to detect when the actual limit was effected.

Redefine the pids.events:max as: the number of times the limit of the
cgroup was hit.

(Implementation differentiates also "forkfail" event but this is
currently not exposed as it would better fit into pids.stat. It also
differs from pids.events:max only when pids.max is configured on
non-leaf cgroups.)

Since it changes semantics of the original "max" event, introduce this
change only in the v2 API of the controller and add a cgroup2 mount
option to revert to the legacy behavior.

Signed-off-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index ea48c861cd36..b36690ca0d3f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -119,7 +119,12 @@ enum {
 	/*
 	 * Enable hugetlb accounting for the memory controller.
 	 */
-	 CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19),
+	CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19),
+
+	/*
+	 * Enable legacy local pids.events.
+	 */
+	CGRP_ROOT_PIDS_LOCAL_EVENTS = (1 << 20),
 };
 
 /* cftype->flags */
-- 
cgit v1.2.3


From 965cc040bf0698e81b0c0aef359ae650b42b428e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Mon, 20 May 2024 16:19:56 +0200
Subject: ASoC: Constify channel mapping array arguments in set_channel_map()

There is no need for implementations of DAI set_channel_map() to modify
contents of passed arrays with actual channel mapping.  Additionally,
the caller keeps full ownership of the array.

Constify these pointer arguments so the code will be safer and easier to
read (documenting the caller's ownership).

Acked-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://msgid.link/r/20240520-asoc-x1e80100-4-channel-mapping-v4-1-f657159b4aad@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l41.h | 4 ++--
 include/sound/soc-dai.h | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h
index bb70782d15d0..43c6a9ef8d9f 100644
--- a/include/sound/cs35l41.h
+++ b/include/sound/cs35l41.h
@@ -896,8 +896,8 @@ int cs35l41_test_key_lock(struct device *dev, struct regmap *regmap);
 int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap);
 int cs35l41_register_errata_patch(struct device *dev, struct regmap *reg, unsigned int reg_revid);
 int cs35l41_set_channels(struct device *dev, struct regmap *reg,
-			 unsigned int tx_num, unsigned int *tx_slot,
-			 unsigned int rx_num, unsigned int *rx_slot);
+			 unsigned int tx_num, const unsigned int *tx_slot,
+			 unsigned int rx_num, const unsigned int *rx_slot);
 int cs35l41_gpio_config(struct regmap *regmap, struct cs35l41_hw_cfg *hw_cfg);
 void cs35l41_configure_cs_dsp(struct device *dev, struct regmap *reg, struct cs_dsp *dsp);
 int cs35l41_set_cspl_mbox_cmd(struct device *dev, struct regmap *regmap,
diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index adcd8719d343..15ef268c9845 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -188,8 +188,8 @@ int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai,
 	unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width);
 
 int snd_soc_dai_set_channel_map(struct snd_soc_dai *dai,
-	unsigned int tx_num, unsigned int *tx_slot,
-	unsigned int rx_num, unsigned int *rx_slot);
+	unsigned int tx_num, const unsigned int *tx_slot,
+	unsigned int rx_num, const unsigned int *rx_slot);
 
 int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate);
 
@@ -305,8 +305,8 @@ struct snd_soc_dai_ops {
 		unsigned int tx_mask, unsigned int rx_mask,
 		int slots, int slot_width);
 	int (*set_channel_map)(struct snd_soc_dai *dai,
-		unsigned int tx_num, unsigned int *tx_slot,
-		unsigned int rx_num, unsigned int *rx_slot);
+		unsigned int tx_num, const unsigned int *tx_slot,
+		unsigned int rx_num, const unsigned int *rx_slot);
 	int (*get_channel_map)(struct snd_soc_dai *dai,
 			unsigned int *tx_num, unsigned int *tx_slot,
 			unsigned int *rx_num, unsigned int *rx_slot);
-- 
cgit v1.2.3


From 734447685ecc7c6328e40cb1bd4aaeeac03c1413 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 13 May 2024 19:37:20 +0200
Subject: ASoC: topology: Constify an argument of snd_soc_tplg_component_load()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

snd_soc_tplg_component_load() does not modify its "*ops" argument. It
only read some values and stores it in "soc_tplg.ops".

This argument and the ops field in "struct soc_tplg" can be made const.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Link: https://msgid.link/r/f2f983e791d7f941a95556bb147f426a345d84d4.1715526069.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-topology.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc-topology.h b/include/sound/soc-topology.h
index f055c6917f6c..1eedd203ac29 100644
--- a/include/sound/soc-topology.h
+++ b/include/sound/soc-topology.h
@@ -178,7 +178,7 @@ static inline const void *snd_soc_tplg_get_data(struct snd_soc_tplg_hdr *hdr)
 
 /* Dynamic Object loading and removal for component drivers */
 int snd_soc_tplg_component_load(struct snd_soc_component *comp,
-	struct snd_soc_tplg_ops *ops, const struct firmware *fw);
+	const struct snd_soc_tplg_ops *ops, const struct firmware *fw);
 int snd_soc_tplg_component_remove(struct snd_soc_component *comp);
 
 /* Binds event handlers to dynamic widgets */
-- 
cgit v1.2.3


From 7a147670035ddec35f3fb2ace538ad56ad82c861 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 12 May 2024 13:11:21 +0200
Subject: regulator: consumer: Reorder fields in 'struct regulator_bulk_data'

Based on pahole, 2 holes can be combined in 'struct regulator_bulk_data'.

On x86_64 and allmodconfig, this shrinks the size of the structure from 32
to 24 bytes.

This is usually a win, because this structure is often used for static
global variables.

As an example:
Before:
   text	   data	    bss	    dec	    hex	filename
   3557	    162	      0	   3719	    e87	drivers/gpu/drm/msm/dsi/dsi_cfg.o

After:
   text	   data	    bss	    dec	    hex	filename
   3477	    162	      0	   3639	    e37	drivers/gpu/drm/msm/dsi/dsi_cfg.o

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://msgid.link/r/35c4edf2dbc6d4f24fb771341ded2989ae32f779.1715512259.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/consumer.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 59d0b9a79e6e..e6f81fc1fb17 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -128,11 +128,11 @@ struct regulator;
  *
  * @supply:       The name of the supply.  Initialised by the user before
  *                using the bulk regulator APIs.
+ * @consumer:     The regulator consumer for the supply.  This will be managed
+ *                by the bulk API.
  * @init_load_uA: After getting the regulator, regulator_set_load() will be
  *                called with this load.  Initialised by the user before
  *                using the bulk regulator APIs.
- * @consumer:     The regulator consumer for the supply.  This will be managed
- *                by the bulk API.
  *
  * The regulator APIs provide a series of regulator_bulk_() API calls as
  * a convenience to consumers which require multiple supplies.  This
@@ -140,8 +140,8 @@ struct regulator;
  */
 struct regulator_bulk_data {
 	const char *supply;
-	int init_load_uA;
 	struct regulator *consumer;
+	int init_load_uA;
 
 	/* private: Internal use */
 	int ret;
-- 
cgit v1.2.3


From f261172d39f358dcecce13c310690d3937e0cca6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 17 May 2024 22:40:20 +0300
Subject: spi: bitbang: Use typedef for txrx_*() callbacks

With a typedef for the txrx_*() callbacks the code looks neater.
Note that typedef for a function is okay to have.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://msgid.link/r/20240517194104.747328-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi_bitbang.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index b930eca2ef7b..7ca08b430ed5 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -4,6 +4,8 @@
 
 #include <linux/workqueue.h>
 
+typedef u32 (*spi_bb_txrx_word_fn)(struct spi_device *, unsigned int, u32, u8, unsigned int);
+
 struct spi_bitbang {
 	struct mutex		lock;
 	u8			busy;
@@ -28,9 +30,8 @@ struct spi_bitbang {
 	int	(*txrx_bufs)(struct spi_device *spi, struct spi_transfer *t);
 
 	/* txrx_word[SPI_MODE_*]() just looks like a shift register */
-	u32	(*txrx_word[4])(struct spi_device *spi,
-			unsigned nsecs,
-			u32 word, u8 bits, unsigned flags);
+	spi_bb_txrx_word_fn txrx_word[4];
+
 	int	(*set_line_direction)(struct spi_device *spi, bool output);
 };
 
-- 
cgit v1.2.3


From b90cc232e2ce8c959b19dc4b183e23e7aec137ab Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 17 May 2024 22:40:22 +0300
Subject: spi: bitbang: Replace hard coded number of SPI modes

Instead of using hard coded number of modes, replace it with
SPI_MODE_X_MASK + 1 to show relation to the SPI modes.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://msgid.link/r/20240517194104.747328-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi_bitbang.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index 7ca08b430ed5..d4cb83195f7a 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -30,7 +30,7 @@ struct spi_bitbang {
 	int	(*txrx_bufs)(struct spi_device *spi, struct spi_transfer *t);
 
 	/* txrx_word[SPI_MODE_*]() just looks like a shift register */
-	spi_bb_txrx_word_fn txrx_word[4];
+	spi_bb_txrx_word_fn txrx_word[SPI_MODE_X_MASK + 1];
 
 	int	(*set_line_direction)(struct spi_device *spi, bool output);
 };
-- 
cgit v1.2.3


From 983095eaf6c161ef73d96152bfc1a99ca051cd57 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 25 May 2024 18:00:31 +0200
Subject: dma-buf/fence-array: Add flex array to struct dma_fence_array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is an effort to get rid of all multiplications from allocation
functions in order to prevent integer overflows [1][2].

The "struct dma_fence_array" can be refactored to add a flex array in order
to have the "callback structures allocated behind the array" be more
explicit.

Do so:
   - makes the code more readable and safer.
   - allows using __counted_by() for additional checks
   - avoids some pointer arithmetic in dma_fence_array_enable_signaling()

Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1]
Link: https://github.com/KSPP/linux/issues/160 [2]
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/8b4e556e07b5dd78bb8a39b67ea0a43b199083c8.1716652811.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/linux/dma-fence-array.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h
index ec7f25def392..29c5650c1038 100644
--- a/include/linux/dma-fence-array.h
+++ b/include/linux/dma-fence-array.h
@@ -33,6 +33,7 @@ struct dma_fence_array_cb {
  * @num_pending: fences in the array still pending
  * @fences: array of the fences
  * @work: internal irq_work function
+ * @callbacks: array of callback helpers
  */
 struct dma_fence_array {
 	struct dma_fence base;
@@ -43,6 +44,8 @@ struct dma_fence_array {
 	struct dma_fence **fences;
 
 	struct irq_work work;
+
+	struct dma_fence_array_cb callbacks[] __counted_by(num_fences);
 };
 
 /**
-- 
cgit v1.2.3


From 0edb555a65d1ef047a9805051c36922b52a38a9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Mon, 9 Oct 2023 12:37:26 +0200
Subject: platform: Make platform_driver::remove() return void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

struct platform_driver::remove returning an integer made driver authors
expect that returning an error code was proper error handling. However
the driver core ignores the error and continues to remove the device
because there is nothing the core could do anyhow and reentering the
remove callback again is only calling for trouble.

To prevent such wrong assumptions, change the return type of the remove
callback to void. This was prepared by introducing an alternative remove
callback returning void and converting all drivers to that. So .remove()
can be changed without further changes in drivers.

This corresponds to step b) of the plan outlined in commit
5c5a7680e67b ("platform: Provide a remove callback that returns no value").

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 include/linux/platform_device.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 7a41c72c1959..d422db6eec63 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -237,15 +237,14 @@ struct platform_driver {
 	int (*probe)(struct platform_device *);
 
 	/*
-	 * Traditionally the remove callback returned an int which however is
-	 * ignored by the driver core. This led to wrong expectations by driver
-	 * authors who thought returning an error code was a valid error
-	 * handling strategy. To convert to a callback returning void, new
-	 * drivers should implement .remove_new() until the conversion it done
-	 * that eventually makes .remove() return void.
+	 * .remove_new() is a relic from a prototype conversion of .remove().
+	 * New drivers are supposed to implement .remove(). Once all drivers are
+	 * converted to not use .remove_new any more, it will be dropped.
 	 */
-	int (*remove)(struct platform_device *);
-	void (*remove_new)(struct platform_device *);
+	union {
+		void (*remove)(struct platform_device *);
+		void (*remove_new)(struct platform_device *);
+	};
 
 	void (*shutdown)(struct platform_device *);
 	int (*suspend)(struct platform_device *, pm_message_t state);
-- 
cgit v1.2.3


From c80c4490c280a1678e47d34d2a335a58f1318615 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 25 May 2024 12:45:31 +0000
Subject: cleanup: Standardize the header guard define's name

At some point during early development, the <linux/cleanup.h> header
must have been named <linux/guard.h>, as evidenced by the header
guard name:

  #ifndef __LINUX_GUARDS_H
  #define __LINUX_GUARDS_H

It ended up being <linux/cleanup.h>, but the old guard name for
a file name that was never upstream never changed.

Do that now - and while at it, also use the canonical _LINUX prefix,
instead of the less common __LINUX prefix.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/171664113181.10875.8784434350512348496.tip-bot2@tip-bot2
---
 include/linux/cleanup.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index c2d09bc4f976..cef68e8e09b6 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LINUX_GUARDS_H
-#define __LINUX_GUARDS_H
+#ifndef _LINUX_CLEANUP_H
+#define _LINUX_CLEANUP_H
 
 #include <linux/compiler.h>
 
@@ -247,4 +247,4 @@ __DEFINE_LOCK_GUARD_0(_name, _lock)
 	{ return class_##_name##_lock_ptr(_T); }
 
 
-#endif /* __LINUX_GUARDS_H */
+#endif /* _LINUX_CLEANUP_H */
-- 
cgit v1.2.3


From 5350f6ec55df381cd99db2a6bde283328fb3f75a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 3 May 2024 21:42:30 +0300
Subject: mtd: cfi: Get rid of redundant 'else'

In the snippets like the following

	if (...)
		return / goto / break / continue ...;
	else
		...

the 'else' is redundant. Get rid of it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20240503184230.2927283-1-andriy.shevchenko@linux.intel.com
---
 include/linux/mtd/cfi.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 947410faf9e2..35ca19ae21ae 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -308,32 +308,32 @@ static inline uint8_t cfi_read_query(struct map_info *map, uint32_t addr)
 {
 	map_word val = map_read(map, addr);
 
-	if (map_bankwidth_is_1(map)) {
+	if (map_bankwidth_is_1(map))
 		return val.x[0];
-	} else if (map_bankwidth_is_2(map)) {
+	if (map_bankwidth_is_2(map))
 		return cfi16_to_cpu(map, val.x[0]);
-	} else {
-		/* No point in a 64-bit byteswap since that would just be
-		   swapping the responses from different chips, and we are
-		   only interested in one chip (a representative sample) */
-		return cfi32_to_cpu(map, val.x[0]);
-	}
+	/*
+	 * No point in a 64-bit byteswap since that would just be
+	 * swapping the responses from different chips, and we are
+	 * only interested in one chip (a representative sample)
+	 */
+	return cfi32_to_cpu(map, val.x[0]);
 }
 
 static inline uint16_t cfi_read_query16(struct map_info *map, uint32_t addr)
 {
 	map_word val = map_read(map, addr);
 
-	if (map_bankwidth_is_1(map)) {
+	if (map_bankwidth_is_1(map))
 		return val.x[0] & 0xff;
-	} else if (map_bankwidth_is_2(map)) {
+	if (map_bankwidth_is_2(map))
 		return cfi16_to_cpu(map, val.x[0]);
-	} else {
-		/* No point in a 64-bit byteswap since that would just be
-		   swapping the responses from different chips, and we are
-		   only interested in one chip (a representative sample) */
-		return cfi32_to_cpu(map, val.x[0]);
-	}
+	/*
+	 * No point in a 64-bit byteswap since that would just be
+	 * swapping the responses from different chips, and we are
+	 * only interested in one chip (a representative sample)
+	 */
+	return cfi32_to_cpu(map, val.x[0]);
 }
 
 void cfi_udelay(int us);
-- 
cgit v1.2.3


From 56813b244e5f2ace887f04c4a69a2a0041992297 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@toblux.com>
Date: Mon, 27 May 2024 11:27:47 +0200
Subject: w1: Add missing newline and fix typos in w1_bus_master comment

- Add missing newline before @return
- s/bytes/byte/
- s/handles/handle/
- s/exists/exist/ in dev_info() message

Signed-off-by: Thorsten Blum <thorsten.blum@toblux.com>
Link: https://lore.kernel.org/r/20240527092746.263038-2-thorsten.blum@toblux.com
[krzysztof: squash "w1: Fix typo in dev_info() message"]
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
---
 include/linux/w1.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/w1.h b/include/linux/w1.h
index 9a2a0ef39018..064805bfae3f 100644
--- a/include/linux/w1.h
+++ b/include/linux/w1.h
@@ -85,7 +85,8 @@ typedef void (*w1_slave_found_callback)(struct w1_master *, u64);
  *
  * @data: the first parameter in all the functions below
  *
- * @read_bit: Sample the line level @return the level read (0 or 1)
+ * @read_bit: Sample the line level
+ * @return the level read (0 or 1)
  *
  * @write_bit: Sets the line level
  *
@@ -95,7 +96,7 @@ typedef void (*w1_slave_found_callback)(struct w1_master *, u64);
  * touch_bit(1) = write-1 / read cycle
  * @return the bit read (0 or 1)
  *
- * @read_byte: Reads a bytes. Same as 8 touch_bit(1) calls.
+ * @read_byte: Reads a byte. Same as 8 touch_bit(1) calls.
  * @return the byte read
  *
  * @write_byte: Writes a byte. Same as 8 touch_bit(x) calls.
@@ -114,7 +115,7 @@ typedef void (*w1_slave_found_callback)(struct w1_master *, u64);
  * @set_pullup: Put out a strong pull-up pulse of the specified duration.
  * @return -1=Error, 0=completed
  *
- * @search: Really nice hardware can handles the different types of ROM search
+ * @search: Really nice hardware can handle the different types of ROM search
  * w1_master* is passed to the slave found callback.
  * u8 is search_type, W1_SEARCH or W1_ALARM_SEARCH
  *
-- 
cgit v1.2.3


From 8cb3aeebcb86088e30232277c9bee9054837442b Mon Sep 17 00:00:00 2001
From: Mohan Kumar <mkumard@nvidia.com>
Date: Mon, 27 May 2024 12:56:06 +0000
Subject: ASoC: simple-card-utils: Split simple_fixup_sample_fmt func

Split the simple_fixup_sample_fmt() into two functions by adding
one more function named simple_util_get_sample_fmt() to return
the sample format value.

This is useful for drivers that wish to simply get the sample format
without setting the mask.

Signed-off-by: Mohan Kumar <mkumard@nvidia.com>
Signed-off-by: Sameer Pujar <spujar@nvidia.com>
Link: https://msgid.link/r/20240527125608.2461300-2-spujar@nvidia.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/simple_card_utils.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h
index ad67957b7b48..2c2279d082ec 100644
--- a/include/sound/simple_card_utils.h
+++ b/include/sound/simple_card_utils.h
@@ -174,6 +174,8 @@ void simple_util_parse_convert(struct device_node *np, char *prefix,
 			       struct simple_util_data *data);
 bool simple_util_is_convert_required(const struct simple_util_data *data);
 
+int simple_util_get_sample_fmt(struct simple_util_data *data);
+
 int simple_util_parse_routing(struct snd_soc_card *card,
 				      char *prefix);
 int simple_util_parse_widgets(struct snd_soc_card *card,
-- 
cgit v1.2.3


From 3aa63a569c64e708df547a8913c84e64a06e7853 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 17 May 2024 20:08:40 -0400
Subject: fs: switch timespec64 fields in inode to discrete integers

Adjacent struct timespec64's pack poorly. Switch them to discrete
integer fields for the seconds and nanoseconds. This shaves 8 bytes off
struct inode with a garden-variety Fedora Kconfig on x86_64, but that
also moves the i_lock into the previous cacheline, away from the fields
it protects.

To remedy that, move i_generation above the i_lock, which moves the new
4-byte hole to just after the i_fsnotify_mask in my setup. Amir has
plans to use that to expand the i_fsnotify_mask, so add a comment to
that effect as well.

Cc: Amir Goldstein <amir73il@gmail.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Link: https://lore.kernel.org/r/20240517-amtime-v1-1-7b804ca4be8f@kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 48 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0283cf366c2a..639885621608 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -660,9 +660,13 @@ struct inode {
 	};
 	dev_t			i_rdev;
 	loff_t			i_size;
-	struct timespec64	__i_atime;
-	struct timespec64	__i_mtime;
-	struct timespec64	__i_ctime; /* use inode_*_ctime accessors! */
+	time64_t		i_atime_sec;
+	time64_t		i_mtime_sec;
+	time64_t		i_ctime_sec;
+	u32			i_atime_nsec;
+	u32			i_mtime_nsec;
+	u32			i_ctime_nsec;
+	u32			i_generation;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned short          i_bytes;
 	u8			i_blkbits;
@@ -719,10 +723,10 @@ struct inode {
 		unsigned		i_dir_seq;
 	};
 
-	__u32			i_generation;
 
 #ifdef CONFIG_FSNOTIFY
 	__u32			i_fsnotify_mask; /* all events this inode cares about */
+	/* 32-bit hole reserved for expanding i_fsnotify_mask */
 	struct fsnotify_mark_connector __rcu	*i_fsnotify_marks;
 #endif
 
@@ -1538,23 +1542,27 @@ struct timespec64 inode_set_ctime_current(struct inode *inode);
 
 static inline time64_t inode_get_atime_sec(const struct inode *inode)
 {
-	return inode->__i_atime.tv_sec;
+	return inode->i_atime_sec;
 }
 
 static inline long inode_get_atime_nsec(const struct inode *inode)
 {
-	return inode->__i_atime.tv_nsec;
+	return inode->i_atime_nsec;
 }
 
 static inline struct timespec64 inode_get_atime(const struct inode *inode)
 {
-	return inode->__i_atime;
+	struct timespec64 ts = { .tv_sec  = inode_get_atime_sec(inode),
+				 .tv_nsec = inode_get_atime_nsec(inode) };
+
+	return ts;
 }
 
 static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
 						      struct timespec64 ts)
 {
-	inode->__i_atime = ts;
+	inode->i_atime_sec = ts.tv_sec;
+	inode->i_atime_nsec = ts.tv_nsec;
 	return ts;
 }
 
@@ -1563,28 +1571,32 @@ static inline struct timespec64 inode_set_atime(struct inode *inode,
 {
 	struct timespec64 ts = { .tv_sec  = sec,
 				 .tv_nsec = nsec };
+
 	return inode_set_atime_to_ts(inode, ts);
 }
 
 static inline time64_t inode_get_mtime_sec(const struct inode *inode)
 {
-	return inode->__i_mtime.tv_sec;
+	return inode->i_mtime_sec;
 }
 
 static inline long inode_get_mtime_nsec(const struct inode *inode)
 {
-	return inode->__i_mtime.tv_nsec;
+	return inode->i_mtime_nsec;
 }
 
 static inline struct timespec64 inode_get_mtime(const struct inode *inode)
 {
-	return inode->__i_mtime;
+	struct timespec64 ts = { .tv_sec  = inode_get_mtime_sec(inode),
+				 .tv_nsec = inode_get_mtime_nsec(inode) };
+	return ts;
 }
 
 static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
 						      struct timespec64 ts)
 {
-	inode->__i_mtime = ts;
+	inode->i_mtime_sec = ts.tv_sec;
+	inode->i_mtime_nsec = ts.tv_nsec;
 	return ts;
 }
 
@@ -1598,23 +1610,27 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode,
 
 static inline time64_t inode_get_ctime_sec(const struct inode *inode)
 {
-	return inode->__i_ctime.tv_sec;
+	return inode->i_ctime_sec;
 }
 
 static inline long inode_get_ctime_nsec(const struct inode *inode)
 {
-	return inode->__i_ctime.tv_nsec;
+	return inode->i_ctime_nsec;
 }
 
 static inline struct timespec64 inode_get_ctime(const struct inode *inode)
 {
-	return inode->__i_ctime;
+	struct timespec64 ts = { .tv_sec  = inode_get_ctime_sec(inode),
+				 .tv_nsec = inode_get_ctime_nsec(inode) };
+
+	return ts;
 }
 
 static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
 						      struct timespec64 ts)
 {
-	inode->__i_ctime = ts;
+	inode->i_ctime_sec = ts.tv_sec;
+	inode->i_ctime_nsec = ts.tv_nsec;
 	return ts;
 }
 
-- 
cgit v1.2.3


From 447e140e66fd226350b3ce86cffc965eaae4c856 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 8 May 2024 13:17:01 +0300
Subject: gpio: Remove legacy API documentation

In order to discourage people to use old and legacy GPIO APIs
remove the respective documentation completely. It also helps
further cleanups of the legacy GPIO API leftovers, which is
ongoing task.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Hu Haowen <2023002089@link.tyut.edu.cn>
Link: https://lore.kernel.org/r/20240508101703.830066-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 56ac7e7a2889..063f71b18a7c 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * <linux/gpio.h>
+ * NOTE: This header *must not* be included.
  *
  * This is the LEGACY GPIO bulk include file, including legacy APIs. It is
  * used for GPIO drivers still referencing the global GPIO numberspace,
@@ -16,8 +16,6 @@
 
 struct device;
 
-/* see Documentation/driver-api/gpio/legacy.rst */
-
 /* make these flag values available regardless of GPIO kconfig options */
 #define GPIOF_DIR_OUT	(0 << 0)
 #define GPIOF_DIR_IN	(1 << 0)
@@ -121,8 +119,6 @@ static inline int gpio_to_irq(unsigned gpio)
 
 int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
 
-/* CONFIG_GPIOLIB: bindings for managed devices that want to request gpios */
-
 int devm_gpio_request(struct device *dev, unsigned gpio, const char *label);
 int devm_gpio_request_one(struct device *dev, unsigned gpio,
 			  unsigned long flags, const char *label);
-- 
cgit v1.2.3


From ca3a91063acc3abc0fb233591d8cda4b37dc39ac Mon Sep 17 00:00:00 2001
From: Danila Tikhonov <danila@jiaxyga.com>
Date: Sun, 5 May 2024 23:10:33 +0300
Subject: dt-bindings: clock: qcom: Add SM7150 DISPCC clocks

Add device tree bindings for the display clock controller on Qualcomm
SM7150 platform.

Co-developed-by: David Wronek <david@mainlining.org>
Signed-off-by: David Wronek <david@mainlining.org>
Signed-off-by: Danila Tikhonov <danila@jiaxyga.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20240505201038.276047-4-danila@jiaxyga.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sm7150-dispcc.h | 59 ++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sm7150-dispcc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sm7150-dispcc.h b/include/dt-bindings/clock/qcom,sm7150-dispcc.h
new file mode 100644
index 000000000000..fc1fefe8fd72
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sm7150-dispcc.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2024, Danila Tikhonov <danila@jiaxyga.com>
+ * Copyright (c) 2024, David Wronek <david@mainlining.org>
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_DISPCC_SM7150_H
+#define _DT_BINDINGS_CLK_QCOM_DISPCC_SM7150_H
+
+/* DISPCC clock registers */
+#define DISPCC_PLL0				0
+#define DISPCC_MDSS_AHB_CLK			1
+#define DISPCC_MDSS_AHB_CLK_SRC			2
+#define DISPCC_MDSS_BYTE0_CLK			3
+#define DISPCC_MDSS_BYTE0_CLK_SRC		4
+#define DISPCC_MDSS_BYTE0_DIV_CLK_SRC		5
+#define DISPCC_MDSS_BYTE0_INTF_CLK		6
+#define DISPCC_MDSS_BYTE1_CLK			7
+#define DISPCC_MDSS_BYTE1_CLK_SRC		8
+#define DISPCC_MDSS_BYTE1_DIV_CLK_SRC		9
+#define DISPCC_MDSS_BYTE1_INTF_CLK		10
+#define DISPCC_MDSS_DP_AUX_CLK			11
+#define DISPCC_MDSS_DP_AUX_CLK_SRC		12
+#define DISPCC_MDSS_DP_CRYPTO_CLK		13
+#define DISPCC_MDSS_DP_CRYPTO_CLK_SRC		14
+#define DISPCC_MDSS_DP_LINK_CLK			15
+#define DISPCC_MDSS_DP_LINK_CLK_SRC		16
+#define DISPCC_MDSS_DP_LINK_INTF_CLK		17
+#define DISPCC_MDSS_DP_PIXEL1_CLK		18
+#define DISPCC_MDSS_DP_PIXEL1_CLK_SRC		19
+#define DISPCC_MDSS_DP_PIXEL_CLK		20
+#define DISPCC_MDSS_DP_PIXEL_CLK_SRC		21
+#define DISPCC_MDSS_ESC0_CLK			22
+#define DISPCC_MDSS_ESC0_CLK_SRC		23
+#define DISPCC_MDSS_ESC1_CLK			24
+#define DISPCC_MDSS_ESC1_CLK_SRC		25
+#define DISPCC_MDSS_MDP_CLK			26
+#define DISPCC_MDSS_MDP_CLK_SRC			27
+#define DISPCC_MDSS_MDP_LUT_CLK			28
+#define DISPCC_MDSS_NON_GDSC_AHB_CLK		29
+#define DISPCC_MDSS_PCLK0_CLK			30
+#define DISPCC_MDSS_PCLK0_CLK_SRC		31
+#define DISPCC_MDSS_PCLK1_CLK			32
+#define DISPCC_MDSS_PCLK1_CLK_SRC		33
+#define DISPCC_MDSS_ROT_CLK			34
+#define DISPCC_MDSS_ROT_CLK_SRC			35
+#define DISPCC_MDSS_RSCC_AHB_CLK		36
+#define DISPCC_MDSS_RSCC_VSYNC_CLK		37
+#define DISPCC_MDSS_VSYNC_CLK			38
+#define DISPCC_MDSS_VSYNC_CLK_SRC		39
+#define DISPCC_XO_CLK_SRC			40
+#define DISPCC_SLEEP_CLK			41
+#define DISPCC_SLEEP_CLK_SRC			42
+
+/* DISPCC GDSCR */
+#define MDSS_GDSC				0
+
+#endif
-- 
cgit v1.2.3


From 0fd2a048368ea99feccd7dfd6a5f42f6d011f10f Mon Sep 17 00:00:00 2001
From: Danila Tikhonov <danila@jiaxyga.com>
Date: Sun, 5 May 2024 23:10:35 +0300
Subject: dt-bindings: clock: qcom: Add SM7150 CAMCC clocks

Add device tree bindings for the camera clock controller on Qualcomm
SM7150 platform.

Signed-off-by: Danila Tikhonov <danila@jiaxyga.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20240505201038.276047-6-danila@jiaxyga.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sm7150-camcc.h | 113 ++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sm7150-camcc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sm7150-camcc.h b/include/dt-bindings/clock/qcom,sm7150-camcc.h
new file mode 100644
index 000000000000..ce73ef0fe95d
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sm7150-camcc.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2024, Danila Tikhonov <danila@jiaxyga.com>
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_CAMCC_SM7150_H
+#define _DT_BINDINGS_CLK_QCOM_CAMCC_SM7150_H
+
+/* Hardware clocks */
+#define CAMCC_PLL0_OUT_EVEN					0
+#define CAMCC_PLL0_OUT_ODD					1
+#define CAMCC_PLL1_OUT_EVEN					2
+#define CAMCC_PLL2_OUT_EARLY					3
+#define CAMCC_PLL3_OUT_EVEN					4
+#define CAMCC_PLL4_OUT_EVEN					5
+
+/* CAMCC clock registers */
+#define CAMCC_PLL0						6
+#define CAMCC_PLL1						7
+#define CAMCC_PLL2						8
+#define CAMCC_PLL2_OUT_AUX					9
+#define CAMCC_PLL2_OUT_MAIN					10
+#define CAMCC_PLL3						11
+#define CAMCC_PLL4						12
+#define CAMCC_BPS_AHB_CLK					13
+#define CAMCC_BPS_AREG_CLK					14
+#define CAMCC_BPS_AXI_CLK					15
+#define CAMCC_BPS_CLK						16
+#define CAMCC_BPS_CLK_SRC					17
+#define CAMCC_CAMNOC_AXI_CLK					18
+#define CAMCC_CAMNOC_AXI_CLK_SRC				19
+#define CAMCC_CAMNOC_DCD_XO_CLK					20
+#define CAMCC_CCI_0_CLK						21
+#define CAMCC_CCI_0_CLK_SRC					22
+#define CAMCC_CCI_1_CLK						23
+#define CAMCC_CCI_1_CLK_SRC					24
+#define CAMCC_CORE_AHB_CLK					25
+#define CAMCC_CPAS_AHB_CLK					26
+#define CAMCC_CPHY_RX_CLK_SRC					27
+#define CAMCC_CSI0PHYTIMER_CLK					28
+#define CAMCC_CSI0PHYTIMER_CLK_SRC				29
+#define CAMCC_CSI1PHYTIMER_CLK					30
+#define CAMCC_CSI1PHYTIMER_CLK_SRC				31
+#define CAMCC_CSI2PHYTIMER_CLK					32
+#define CAMCC_CSI2PHYTIMER_CLK_SRC				33
+#define CAMCC_CSI3PHYTIMER_CLK					34
+#define CAMCC_CSI3PHYTIMER_CLK_SRC				35
+#define CAMCC_CSIPHY0_CLK					36
+#define CAMCC_CSIPHY1_CLK					37
+#define CAMCC_CSIPHY2_CLK					38
+#define CAMCC_CSIPHY3_CLK					39
+#define CAMCC_FAST_AHB_CLK_SRC					40
+#define CAMCC_FD_CORE_CLK					41
+#define CAMCC_FD_CORE_CLK_SRC					42
+#define CAMCC_FD_CORE_UAR_CLK					43
+#define CAMCC_ICP_AHB_CLK					44
+#define CAMCC_ICP_CLK						45
+#define CAMCC_ICP_CLK_SRC					46
+#define CAMCC_IFE_0_AXI_CLK					47
+#define CAMCC_IFE_0_CLK						48
+#define CAMCC_IFE_0_CLK_SRC					49
+#define CAMCC_IFE_0_CPHY_RX_CLK					50
+#define CAMCC_IFE_0_CSID_CLK					51
+#define CAMCC_IFE_0_CSID_CLK_SRC				52
+#define CAMCC_IFE_0_DSP_CLK					53
+#define CAMCC_IFE_1_AXI_CLK					54
+#define CAMCC_IFE_1_CLK						55
+#define CAMCC_IFE_1_CLK_SRC					56
+#define CAMCC_IFE_1_CPHY_RX_CLK					57
+#define CAMCC_IFE_1_CSID_CLK					58
+#define CAMCC_IFE_1_CSID_CLK_SRC				59
+#define CAMCC_IFE_1_DSP_CLK					60
+#define CAMCC_IFE_LITE_CLK					61
+#define CAMCC_IFE_LITE_CLK_SRC					62
+#define CAMCC_IFE_LITE_CPHY_RX_CLK				63
+#define CAMCC_IFE_LITE_CSID_CLK					64
+#define CAMCC_IFE_LITE_CSID_CLK_SRC				65
+#define CAMCC_IPE_0_AHB_CLK					66
+#define CAMCC_IPE_0_AREG_CLK					67
+#define CAMCC_IPE_0_AXI_CLK					68
+#define CAMCC_IPE_0_CLK						69
+#define CAMCC_IPE_0_CLK_SRC					70
+#define CAMCC_IPE_1_AHB_CLK					71
+#define CAMCC_IPE_1_AREG_CLK					72
+#define CAMCC_IPE_1_AXI_CLK					73
+#define CAMCC_IPE_1_CLK						74
+#define CAMCC_JPEG_CLK						75
+#define CAMCC_JPEG_CLK_SRC					76
+#define CAMCC_LRME_CLK						77
+#define CAMCC_LRME_CLK_SRC					78
+#define CAMCC_MCLK0_CLK						79
+#define CAMCC_MCLK0_CLK_SRC					80
+#define CAMCC_MCLK1_CLK						81
+#define CAMCC_MCLK1_CLK_SRC					82
+#define CAMCC_MCLK2_CLK						83
+#define CAMCC_MCLK2_CLK_SRC					84
+#define CAMCC_MCLK3_CLK						85
+#define CAMCC_MCLK3_CLK_SRC					86
+#define CAMCC_SLEEP_CLK						87
+#define CAMCC_SLEEP_CLK_SRC					88
+#define CAMCC_SLOW_AHB_CLK_SRC					89
+#define CAMCC_XO_CLK_SRC					90
+
+/* CAMCC GDSCRs */
+#define BPS_GDSC						0
+#define IFE_0_GDSC						1
+#define IFE_1_GDSC						2
+#define IPE_0_GDSC						3
+#define IPE_1_GDSC						4
+#define TITAN_TOP_GDSC						5
+
+#endif
-- 
cgit v1.2.3


From a4be1860b9319e9e55eaa9e28e35e7b19128060c Mon Sep 17 00:00:00 2001
From: Danila Tikhonov <danila@jiaxyga.com>
Date: Sun, 5 May 2024 23:10:37 +0300
Subject: dt-bindings: clock: qcom: Add SM7150 VIDEOCC clocks

Add device tree bindings for the video clock controller on Qualcomm
SM7150 platform.

Signed-off-by: Danila Tikhonov <danila@jiaxyga.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20240505201038.276047-8-danila@jiaxyga.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sm7150-videocc.h | 28 +++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sm7150-videocc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sm7150-videocc.h b/include/dt-bindings/clock/qcom,sm7150-videocc.h
new file mode 100644
index 000000000000..d86e0fbb159a
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sm7150-videocc.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2024, Danila Tikhonov <danila@jiaxyga.com>
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_VIDEOCC_SM7150_H
+#define _DT_BINDINGS_CLK_QCOM_VIDEOCC_SM7150_H
+
+#define VIDEOCC_PLL0			0
+#define VIDEOCC_IRIS_AHB_CLK		1
+#define VIDEOCC_IRIS_CLK_SRC		2
+#define VIDEOCC_MVS0_AXI_CLK		3
+#define VIDEOCC_MVS0_CORE_CLK		4
+#define VIDEOCC_MVS1_AXI_CLK		5
+#define VIDEOCC_MVS1_CORE_CLK		6
+#define VIDEOCC_MVSC_CORE_CLK		7
+#define VIDEOCC_MVSC_CTL_AXI_CLK	8
+#define VIDEOCC_VENUS_AHB_CLK		9
+#define VIDEOCC_XO_CLK			10
+#define VIDEOCC_XO_CLK_SRC		11
+
+/* VIDEOCC GDSCRs */
+#define VENUS_GDSC			0
+#define VCODEC0_GDSC			1
+#define VCODEC1_GDSC			2
+
+#endif
-- 
cgit v1.2.3


From 27c42e925323b975a64429e313b0cf5c0c02a411 Mon Sep 17 00:00:00 2001
From: Kathiravan Thirumoorthy <quic_kathirav@quicinc.com>
Date: Mon, 25 Mar 2024 21:19:48 +0530
Subject: dt-bindings: arm: qcom,ids: Add SoC ID for IPQ5321

Add the ID for the Qualcomm IPQ5321 SoC.

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Mukesh Ojha <quic_mojha@quicinc.com>
Signed-off-by: Kathiravan Thirumoorthy <quic_kathirav@quicinc.com>
Link: https://lore.kernel.org/r/20240325-ipq5321-sku-support-v2-1-f30ce244732f@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/arm/qcom,ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h
index d040033dc8ee..a2958952a9ec 100644
--- a/include/dt-bindings/arm/qcom,ids.h
+++ b/include/dt-bindings/arm/qcom,ids.h
@@ -272,6 +272,7 @@
 #define QCOM_ID_QCS8550			603
 #define QCOM_ID_QCM8550			604
 #define QCOM_ID_IPQ5300			624
+#define QCOM_ID_IPQ5321			650
 
 /*
  * The board type and revision information, used by Qualcomm bootloaders and
-- 
cgit v1.2.3


From 3fefd9b594aa6be9f120733f5bb57b07d47871a9 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 17 Apr 2024 18:56:57 +0100
Subject: fs: Remove i_blocks_per_page

The last caller has been converted to i_blocks_per_folio() so we
can remove this wrapper.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
---
 include/linux/pagemap.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 3d69589c00a4..63f2f3602a7f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1536,10 +1536,4 @@ unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
 {
 	return folio_size(folio) >> inode->i_blkbits;
 }
-
-static inline
-unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
-{
-	return i_blocks_per_folio(inode, page_folio(page));
-}
 #endif /* _LINUX_PAGEMAP_H */
-- 
cgit v1.2.3


From a0a44d9175b349df2462089140fb7f292100bd7c Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Mon, 27 May 2024 11:01:28 +0200
Subject: mm, slab: don't wrap internal functions with alloc_hooks()

The functions __kmalloc_noprof(), kmalloc_large_noprof(),
kmalloc_trace_noprof() and their _node variants are all internal to the
implementations of kmalloc_noprof() and kmalloc_node_noprof() and are
only declared in the "public" slab.h and exported so that those
implementations can be static inline and distinguish the build-time
constant size variants. The only other users for some of the internal
functions are slub_kunit and fortify_kunit tests which make very
short-lived allocations.

Therefore we can stop wrapping them with the alloc_hooks() macro.
Instead add a __ prefix to all of them and a comment documenting these
as internal. Also rename __kmalloc_trace() to __kmalloc_cache() which is
more descriptive - it is a variant of __kmalloc() where the exact
kmalloc cache has been already determined.

The usage in fortify_kunit can be removed completely, as the internal
functions should be tested already through kmalloc() tests in the
test variant that passes non-constant allocation size.

Reported-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Kees Cook <keescook@chromium.org>
Reviewed-by: Kent Overstreet <kent.overstreet@linux.dev>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7247e217e21b..ed6bee5ec2b6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -528,9 +528,6 @@ static_assert(PAGE_SHIFT <= 20);
 
 #include <linux/alloc_tag.h>
 
-void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
-#define __kmalloc(...)				alloc_hooks(__kmalloc_noprof(__VA_ARGS__))
-
 /**
  * kmem_cache_alloc - Allocate an object
  * @cachep: The cache to allocate from.
@@ -568,31 +565,34 @@ static __always_inline void kfree_bulk(size_t size, void **p)
 	kmem_cache_free_bulk(NULL, size, p);
 }
 
-void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
-							 __alloc_size(1);
-#define __kmalloc_node(...)			alloc_hooks(__kmalloc_node_noprof(__VA_ARGS__))
-
 void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
 				   int node) __assume_slab_alignment __malloc;
 #define kmem_cache_alloc_node(...)	alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))
 
-void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
-		    __assume_kmalloc_alignment __alloc_size(3);
+/*
+ * The following functions are not to be used directly and are intended only
+ * for internal use from kmalloc() and kmalloc_node()
+ * with the exception of kunit tests
+ */
+
+void *__kmalloc_noprof(size_t size, gfp_t flags)
+				__assume_kmalloc_alignment __alloc_size(1);
+
+void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node)
+				__assume_kmalloc_alignment __alloc_size(1);
 
-void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
-		int node, size_t size) __assume_kmalloc_alignment
-						__alloc_size(4);
-#define kmalloc_trace(...)			alloc_hooks(kmalloc_trace_noprof(__VA_ARGS__))
+void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
+				__assume_kmalloc_alignment __alloc_size(3);
 
-#define kmalloc_node_trace(...)			alloc_hooks(kmalloc_node_trace_noprof(__VA_ARGS__))
+void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags,
+				  int node, size_t size)
+				__assume_kmalloc_alignment __alloc_size(4);
 
-void *kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment
-					      __alloc_size(1);
-#define kmalloc_large(...)			alloc_hooks(kmalloc_large_noprof(__VA_ARGS__))
+void *__kmalloc_large_noprof(size_t size, gfp_t flags)
+				__assume_page_alignment __alloc_size(1);
 
-void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment
-							     __alloc_size(1);
-#define kmalloc_large_node(...)			alloc_hooks(kmalloc_large_node_noprof(__VA_ARGS__))
+void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
+				__assume_page_alignment __alloc_size(1);
 
 /**
  * kmalloc - allocate kernel memory
@@ -654,10 +654,10 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
 		unsigned int index;
 
 		if (size > KMALLOC_MAX_CACHE_SIZE)
-			return kmalloc_large_noprof(size, flags);
+			return __kmalloc_large_noprof(size, flags);
 
 		index = kmalloc_index(size);
-		return kmalloc_trace_noprof(
+		return __kmalloc_cache_noprof(
 				kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
 				flags, size);
 	}
@@ -671,10 +671,10 @@ static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gf
 		unsigned int index;
 
 		if (size > KMALLOC_MAX_CACHE_SIZE)
-			return kmalloc_large_node_noprof(size, flags, node);
+			return __kmalloc_large_node_noprof(size, flags, node);
 
 		index = kmalloc_index(size);
-		return kmalloc_node_trace_noprof(
+		return __kmalloc_cache_node_noprof(
 				kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
 				flags, node, size);
 	}
-- 
cgit v1.2.3


From 582d79f34330db4ccee85620cf95ee7ccb9a0d01 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Mon, 27 May 2024 15:57:50 +0200
Subject: drm/connector: Introduce an HDMI connector initialization function

A lot of the various HDMI drivers duplicate some logic that depends on
the HDMI spec itself and not really a particular hardware
implementation.

Output BPC or format selection, infoframe generation are good examples
of such areas.

This creates a lot of boilerplate, with a lot of variations, which makes
it hard for userspace to rely on, and makes it difficult to get it right
for drivers.

In the next patches, we'll add a lot of infrastructure around the
drm_connector and drm_connector_state structures, which will allow to
abstract away the duplicated logic. This infrastructure comes with a few
requirements though, and thus we need a new initialization function.

Hopefully, this will make drivers simpler to handle, and their behaviour
more consistent.

Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Reviewed-by: Sui Jingfeng <sui.jingfeng@linux.dev>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-1-c5af16c3aae2@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/drm_connector.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 58ee9adf9091..7ef1ff83d885 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -1908,6 +1908,11 @@ int drmm_connector_init(struct drm_device *dev,
 			const struct drm_connector_funcs *funcs,
 			int connector_type,
 			struct i2c_adapter *ddc);
+int drmm_connector_hdmi_init(struct drm_device *dev,
+			     struct drm_connector *connector,
+			     const struct drm_connector_funcs *funcs,
+			     int connector_type,
+			     struct i2c_adapter *ddc);
 void drm_connector_attach_edid_property(struct drm_connector *connector);
 int drm_connector_register(struct drm_connector *connector);
 void drm_connector_unregister(struct drm_connector *connector);
-- 
cgit v1.2.3


From 54cb39e2293b1e221708d3ac157ecc59086e1b46 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Mon, 27 May 2024 15:57:52 +0200
Subject: drm/connector: hdmi: Create an HDMI sub-state

The next features we will need to share across drivers will need to
store some parameters for drivers to use, such as the selected output
format.

Let's create a new connector sub-state dedicated to HDMI controllers,
that will eventually store everything we need.

Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Reviewed-by: Sui Jingfeng <sui.jingfeng@linux.dev>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-3-c5af16c3aae2@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/display/drm_hdmi_state_helper.h | 16 ++++++++++++++++
 include/drm/drm_connector.h                 |  7 +++++++
 2 files changed, 23 insertions(+)
 create mode 100644 include/drm/display/drm_hdmi_state_helper.h

(limited to 'include')

diff --git a/include/drm/display/drm_hdmi_state_helper.h b/include/drm/display/drm_hdmi_state_helper.h
new file mode 100644
index 000000000000..837899db315a
--- /dev/null
+++ b/include/drm/display/drm_hdmi_state_helper.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef DRM_HDMI_STATE_HELPER_H_
+#define DRM_HDMI_STATE_HELPER_H_
+
+struct drm_atomic_state;
+struct drm_connector;
+struct drm_connector_state;
+
+void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector,
+					      struct drm_connector_state *new_conn_state);
+
+int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector,
+					   struct drm_atomic_state *state);
+
+#endif // DRM_HDMI_STATE_HELPER_H_
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 7ef1ff83d885..5fe3f6a519cd 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -1031,6 +1031,13 @@ struct drm_connector_state {
 	 * DRM blob property for HDR output metadata
 	 */
 	struct drm_property_blob *hdr_output_metadata;
+
+	/**
+	 * @hdmi: HDMI-related variable and properties. Filled by
+	 * @drm_atomic_helper_connector_hdmi_check().
+	 */
+	struct {
+	} hdmi;
 };
 
 /**
-- 
cgit v1.2.3


From aadb3e16b8f30cc1c1efdfe162f400e026385bfb Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Mon, 27 May 2024 15:57:53 +0200
Subject: drm/connector: hdmi: Add output BPC to the connector state

We'll add automatic selection of the output BPC in a following patch,
but let's add it to the HDMI connector state already.

Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-4-c5af16c3aae2@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/drm_connector.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 5fe3f6a519cd..4ddb438ea2dd 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -1037,6 +1037,10 @@ struct drm_connector_state {
 	 * @drm_atomic_helper_connector_hdmi_check().
 	 */
 	struct {
+		/**
+		 * @output_bpc: Bits per color channel to output.
+		 */
+		unsigned int output_bpc;
 	} hdmi;
 };
 
@@ -1686,6 +1690,11 @@ struct drm_connector {
 	 */
 	struct drm_property_blob *path_blob_ptr;
 
+	/**
+	 * @max_bpc: Maximum bits per color channel the connector supports.
+	 */
+	unsigned int max_bpc;
+
 	/**
 	 * @max_bpc_property: Default connector property for the max bpc to be
 	 * driven out of the connector.
@@ -1919,7 +1928,8 @@ int drmm_connector_hdmi_init(struct drm_device *dev,
 			     struct drm_connector *connector,
 			     const struct drm_connector_funcs *funcs,
 			     int connector_type,
-			     struct i2c_adapter *ddc);
+			     struct i2c_adapter *ddc,
+			     unsigned int max_bpc);
 void drm_connector_attach_edid_property(struct drm_connector *connector);
 int drm_connector_register(struct drm_connector *connector);
 void drm_connector_unregister(struct drm_connector *connector);
-- 
cgit v1.2.3


From 948f01d5e5595023c2e7cfc0184a322be00ef214 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Mon, 27 May 2024 15:57:56 +0200
Subject: drm/connector: hdmi: Add support for output format

Just like BPC, we'll add support for automatic selection of the output
format for HDMI connectors.

Let's add the needed defaults and fields for now.

Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-7-c5af16c3aae2@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/drm_connector.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 4ddb438ea2dd..4b6b1117c7d0 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -368,6 +368,9 @@ enum drm_panel_orientation {
 	DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
 };
 
+const char *
+drm_hdmi_connector_get_output_format_name(enum hdmi_colorspace fmt);
+
 /**
  * struct drm_monitor_range_info - Panel's Monitor range in EDID for
  * &drm_display_info
@@ -1041,6 +1044,11 @@ struct drm_connector_state {
 		 * @output_bpc: Bits per color channel to output.
 		 */
 		unsigned int output_bpc;
+
+		/**
+		 * @output_format: Pixel format to output in.
+		 */
+		enum hdmi_colorspace output_format;
 	} hdmi;
 };
 
@@ -1906,6 +1914,17 @@ struct drm_connector {
 
 	/** @hdr_sink_metadata: HDR Metadata Information read from sink */
 	struct hdr_sink_metadata hdr_sink_metadata;
+
+	/**
+	 * @hdmi: HDMI-related variable and properties.
+	 */
+	struct {
+		/**
+		 * @supported_formats: Bitmask of @hdmi_colorspace
+		 * supported by the controller.
+		 */
+		unsigned long supported_formats;
+	} hdmi;
 };
 
 #define obj_to_connector(x) container_of(x, struct drm_connector, base)
@@ -1929,6 +1948,7 @@ int drmm_connector_hdmi_init(struct drm_device *dev,
 			     const struct drm_connector_funcs *funcs,
 			     int connector_type,
 			     struct i2c_adapter *ddc,
+			     unsigned long supported_formats,
 			     unsigned int max_bpc);
 void drm_connector_attach_edid_property(struct drm_connector *connector);
 int drm_connector_register(struct drm_connector *connector);
-- 
cgit v1.2.3


From 40167bcbd19c241fc30a912fa8a8276b9ed1a12e Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Mon, 27 May 2024 15:57:58 +0200
Subject: drm/display: hdmi: Add HDMI compute clock helper

A lot of HDMI drivers have some variation of the formula to calculate
the TMDS character rate from a mode, but few of them actually take all
parameters into account.

Let's create a helper to provide that rate taking all parameters into
account.

Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-9-c5af16c3aae2@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/display/drm_hdmi_helper.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/drm/display/drm_hdmi_helper.h b/include/drm/display/drm_hdmi_helper.h
index 76d234826e22..57e3b18c15ec 100644
--- a/include/drm/display/drm_hdmi_helper.h
+++ b/include/drm/display/drm_hdmi_helper.h
@@ -24,4 +24,8 @@ drm_hdmi_infoframe_set_hdr_metadata(struct hdmi_drm_infoframe *frame,
 void drm_hdmi_avi_infoframe_content_type(struct hdmi_avi_infoframe *frame,
 					 const struct drm_connector_state *conn_state);
 
+unsigned long long
+drm_hdmi_compute_mode_clock(const struct drm_display_mode *mode,
+			    unsigned int bpc, enum hdmi_colorspace fmt);
+
 #endif
-- 
cgit v1.2.3


From f035f4097f1e0a35a457b72427bb0c06ca0c81c4 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Mon, 27 May 2024 15:58:00 +0200
Subject: drm/connector: hdmi: Calculate TMDS character rate

Most HDMI drivers have some code to calculate the TMDS character rate,
usually to adjust an internal clock to match what the mode requires.

Since the TMDS character rates mostly depends on the resolution, whether
we need to repeat pixels or not, the bpc count and the format, we can
now derive it from the HDMI connector state that stores all those infos
and remove the duplication from drivers.

Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-11-c5af16c3aae2@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/drm_connector.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 4b6b1117c7d0..3bdcf904a11b 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -1049,6 +1049,11 @@ struct drm_connector_state {
 		 * @output_format: Pixel format to output in.
 		 */
 		enum hdmi_colorspace output_format;
+
+		/**
+		 * @tmds_char_rate: TMDS Character Rate, in Hz.
+		 */
+		unsigned long long tmds_char_rate;
 	} hdmi;
 };
 
-- 
cgit v1.2.3


From e5030a74f976b4e808e28e78805c87203ac1a48d Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Mon, 27 May 2024 15:58:02 +0200
Subject: drm/connector: hdmi: Add custom hook to filter TMDS character rate

Most of the HDMI controllers have an upper TMDS character rate limit
they can't exceed. On "embedded"-grade display controllers, it will
typically be lower than what high-grade monitors can provide these days,
so drivers will filter the TMDS character rate based on the controller
capabilities.

To make that easier to handle for drivers, let's provide an optional
hook to be implemented by drivers so they can tell the HDMI controller
helpers if a given TMDS character rate is reachable for them or not.

This will then be useful to figure out the best format and bpc count for
a given mode.

Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-13-c5af16c3aae2@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/drm_connector.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 3bdcf904a11b..ffef967869d9 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -38,6 +38,7 @@ struct drm_connector_helper_funcs;
 struct drm_modeset_acquire_ctx;
 struct drm_device;
 struct drm_crtc;
+struct drm_display_mode;
 struct drm_encoder;
 struct drm_panel;
 struct drm_property;
@@ -1057,6 +1058,30 @@ struct drm_connector_state {
 	} hdmi;
 };
 
+/**
+ * struct drm_connector_hdmi_funcs - drm_hdmi_connector control functions
+ */
+struct drm_connector_hdmi_funcs {
+	/**
+	 * @tmds_char_rate_valid:
+	 *
+	 * This callback is invoked at atomic_check time to figure out
+	 * whether a particular TMDS character rate is supported by the
+	 * driver.
+	 *
+	 * The @tmds_char_rate_valid callback is optional.
+	 *
+	 * Returns:
+	 *
+	 * Either &drm_mode_status.MODE_OK or one of the failure reasons
+	 * in &enum drm_mode_status.
+	 */
+	enum drm_mode_status
+	(*tmds_char_rate_valid)(const struct drm_connector *connector,
+				const struct drm_display_mode *mode,
+				unsigned long long tmds_rate);
+};
+
 /**
  * struct drm_connector_funcs - control connectors on a given device
  *
@@ -1929,6 +1954,11 @@ struct drm_connector {
 		 * supported by the controller.
 		 */
 		unsigned long supported_formats;
+
+		/**
+		 * @funcs: HDMI connector Control Functions
+		 */
+		const struct drm_connector_hdmi_funcs *funcs;
 	} hdmi;
 };
 
@@ -1951,6 +1981,7 @@ int drmm_connector_init(struct drm_device *dev,
 int drmm_connector_hdmi_init(struct drm_device *dev,
 			     struct drm_connector *connector,
 			     const struct drm_connector_funcs *funcs,
+			     const struct drm_connector_hdmi_funcs *hdmi_funcs,
 			     int connector_type,
 			     struct i2c_adapter *ddc,
 			     unsigned long supported_formats,
-- 
cgit v1.2.3


From ab52af4ba7c7dc2e226ede5935a0587743b747d3 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Mon, 27 May 2024 15:58:07 +0200
Subject: drm/connector: hdmi: Add Broadcast RGB property

The i915 driver has a property to force the RGB range of an HDMI output.
The vc4 driver then implemented the same property with the same
semantics. KWin has support for it, and a PR for mutter is also there to
support it.

Both drivers implementing the same property with the same semantics,
plus the userspace having support for it, is proof enough that it's
pretty much a de-facto standard now and we can provide helpers for it.

Let's plumb it into the newly created HDMI connector.

Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Reviewed-by: Sebastian Wick <sebastian.wick@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-18-c5af16c3aae2@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/drm_connector.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index ffef967869d9..207635715466 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -369,6 +369,29 @@ enum drm_panel_orientation {
 	DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
 };
 
+/**
+ * enum drm_hdmi_broadcast_rgb - Broadcast RGB Selection for an HDMI @drm_connector
+ */
+enum drm_hdmi_broadcast_rgb {
+	/**
+	 * @DRM_HDMI_BROADCAST_RGB_AUTO: The RGB range is selected
+	 * automatically based on the mode.
+	 */
+	DRM_HDMI_BROADCAST_RGB_AUTO,
+
+	/**
+	 * @DRM_HDMI_BROADCAST_RGB_FULL: Full range RGB is forced.
+	 */
+	DRM_HDMI_BROADCAST_RGB_FULL,
+
+	/**
+	 * @DRM_HDMI_BROADCAST_RGB_LIMITED: Limited range RGB is forced.
+	 */
+	DRM_HDMI_BROADCAST_RGB_LIMITED,
+};
+
+const char *
+drm_hdmi_connector_get_broadcast_rgb_name(enum drm_hdmi_broadcast_rgb broadcast_rgb);
 const char *
 drm_hdmi_connector_get_output_format_name(enum hdmi_colorspace fmt);
 
@@ -1041,6 +1064,12 @@ struct drm_connector_state {
 	 * @drm_atomic_helper_connector_hdmi_check().
 	 */
 	struct {
+		/**
+		 * @broadcast_rgb: Connector property to pass the
+		 * Broadcast RGB selection value.
+		 */
+		enum drm_hdmi_broadcast_rgb broadcast_rgb;
+
 		/**
 		 * @output_bpc: Bits per color channel to output.
 		 */
@@ -1757,6 +1786,12 @@ struct drm_connector {
 	 */
 	struct drm_property *privacy_screen_hw_state_property;
 
+	/**
+	 * @broadcast_rgb_property: Connector property to set the
+	 * Broadcast RGB selection to output with.
+	 */
+	struct drm_property *broadcast_rgb_property;
+
 #define DRM_CONNECTOR_POLL_HPD (1 << 0)
 #define DRM_CONNECTOR_POLL_CONNECT (1 << 1)
 #define DRM_CONNECTOR_POLL_DISCONNECT (1 << 2)
@@ -2096,6 +2131,7 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector,
 					       u32 scaling_mode_mask);
 int drm_connector_attach_vrr_capable_property(
 		struct drm_connector *connector);
+int drm_connector_attach_broadcast_rgb_property(struct drm_connector *connector);
 int drm_connector_attach_colorspace_property(struct drm_connector *connector);
 int drm_connector_attach_hdr_output_metadata_property(struct drm_connector *connector);
 bool drm_connector_atomic_hdr_metadata_equal(struct drm_connector_state *old_state,
-- 
cgit v1.2.3


From 027d435906490812d4568ff371a8b63c24a36bcd Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Mon, 27 May 2024 15:58:09 +0200
Subject: drm/connector: hdmi: Add RGB Quantization Range to the connector
 state

HDMI controller drivers will need to figure out the RGB range they need
to configure based on a mode and property values. Let's expose that in
the HDMI connector state so drivers can just use that value.

Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-20-c5af16c3aae2@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/drm_connector.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 207635715466..dcad2d46fabe 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -1070,6 +1070,12 @@ struct drm_connector_state {
 		 */
 		enum drm_hdmi_broadcast_rgb broadcast_rgb;
 
+		/**
+		 * @is_full_range: Is the output supposed to use a full
+		 * RGB Quantization Range or not?
+		 */
+		bool is_limited_range;
+
 		/**
 		 * @output_bpc: Bits per color channel to output.
 		 */
-- 
cgit v1.2.3


From f378b77227bc4732922c57f92be89438bb1018a1 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Mon, 27 May 2024 15:58:11 +0200
Subject: drm/connector: hdmi: Add Infoframes generation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Infoframes in KMS is usually handled by a bunch of low-level helpers
that require quite some boilerplate for drivers. This leads to
discrepancies with how drivers generate them, and which are actually
sent.

Now that we have everything needed to generate them in the HDMI
connector state, we can generate them in our common logic so that
drivers can simply reuse what we precomputed.

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-22-c5af16c3aae2@kernel.org
Signed-off-by: Maxime Ripard <mripard@kernel.org>
---
 include/drm/display/drm_hdmi_state_helper.h |   6 ++
 include/drm/drm_connector.h                 | 111 +++++++++++++++++++++++++++-
 2 files changed, 116 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/display/drm_hdmi_state_helper.h b/include/drm/display/drm_hdmi_state_helper.h
index 837899db315a..eb162ff24de0 100644
--- a/include/drm/display/drm_hdmi_state_helper.h
+++ b/include/drm/display/drm_hdmi_state_helper.h
@@ -6,6 +6,7 @@
 struct drm_atomic_state;
 struct drm_connector;
 struct drm_connector_state;
+struct hdmi_audio_infoframe;
 
 void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector,
 					      struct drm_connector_state *new_conn_state);
@@ -13,4 +14,9 @@ void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector,
 int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector,
 					   struct drm_atomic_state *state);
 
+int drm_atomic_helper_connector_hdmi_update_audio_infoframe(struct drm_connector *connector,
+							    struct hdmi_audio_infoframe *frame);
+int drm_atomic_helper_connector_hdmi_update_infoframes(struct drm_connector *connector,
+						       struct drm_atomic_state *state);
+
 #endif // DRM_HDMI_STATE_HELPER_H_
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index dcad2d46fabe..e04a8a0d1bbd 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -914,6 +914,21 @@ struct drm_tv_connector_state {
 	unsigned int hue;
 };
 
+/**
+ * struct drm_connector_hdmi_infoframe - HDMI Infoframe container
+ */
+struct drm_connector_hdmi_infoframe {
+	/**
+	 * @data: HDMI Infoframe structure
+	 */
+	union hdmi_infoframe data;
+
+	/**
+	 * @set: Is the content of @data valid?
+	 */
+	bool set;
+};
+
 /**
  * struct drm_connector_state - mutable connector state
  */
@@ -1071,7 +1086,36 @@ struct drm_connector_state {
 		enum drm_hdmi_broadcast_rgb broadcast_rgb;
 
 		/**
-		 * @is_full_range: Is the output supposed to use a full
+		 * @infoframes: HDMI Infoframes matching that state
+		 */
+		struct {
+			/**
+			 * @avi: AVI Infoframes structure matching our
+			 * state.
+			 */
+			struct drm_connector_hdmi_infoframe avi;
+
+			/**
+			 * @hdr_drm: DRM (Dynamic Range and Mastering)
+			 * Infoframes structure matching our state.
+			 */
+			struct drm_connector_hdmi_infoframe hdr_drm;
+
+			/**
+			 * @spd: SPD Infoframes structure matching our
+			 * state.
+			 */
+			struct drm_connector_hdmi_infoframe spd;
+
+			/**
+			 * @vendor: HDMI Vendor Infoframes structure
+			 * matching our state.
+			 */
+			struct drm_connector_hdmi_infoframe hdmi;
+		} infoframes;
+
+		/**
+		 * @is_limited_range: Is the output supposed to use a limited
 		 * RGB Quantization Range or not?
 		 */
 		bool is_limited_range;
@@ -1115,6 +1159,41 @@ struct drm_connector_hdmi_funcs {
 	(*tmds_char_rate_valid)(const struct drm_connector *connector,
 				const struct drm_display_mode *mode,
 				unsigned long long tmds_rate);
+
+	/**
+	 * @clear_infoframe:
+	 *
+	 * This callback is invoked through
+	 * @drm_atomic_helper_connector_hdmi_update_infoframes during a
+	 * commit to clear the infoframes into the hardware. It will be
+	 * called multiple times, once for every disabled infoframe
+	 * type.
+	 *
+	 * The @clear_infoframe callback is optional.
+	 *
+	 * Returns:
+	 * 0 on success, a negative error code otherwise
+	 */
+	int (*clear_infoframe)(struct drm_connector *connector,
+			       enum hdmi_infoframe_type type);
+
+	/**
+	 * @write_infoframe:
+	 *
+	 * This callback is invoked through
+	 * @drm_atomic_helper_connector_hdmi_update_infoframes during a
+	 * commit to program the infoframes into the hardware. It will
+	 * be called multiple times, once for every updated infoframe
+	 * type.
+	 *
+	 * The @write_infoframe callback is mandatory.
+	 *
+	 * Returns:
+	 * 0 on success, a negative error code otherwise
+	 */
+	int (*write_infoframe)(struct drm_connector *connector,
+			       enum hdmi_infoframe_type type,
+			       const u8 *buffer, size_t len);
 };
 
 /**
@@ -1990,6 +2069,18 @@ struct drm_connector {
 	 * @hdmi: HDMI-related variable and properties.
 	 */
 	struct {
+#define DRM_CONNECTOR_HDMI_VENDOR_LEN	8
+		/**
+		 * @vendor: HDMI Controller Vendor Name
+		 */
+		unsigned char vendor[DRM_CONNECTOR_HDMI_VENDOR_LEN] __nonstring;
+
+#define DRM_CONNECTOR_HDMI_PRODUCT_LEN	16
+		/**
+		 * @product: HDMI Controller Product Name
+		 */
+		unsigned char product[DRM_CONNECTOR_HDMI_PRODUCT_LEN] __nonstring;
+
 		/**
 		 * @supported_formats: Bitmask of @hdmi_colorspace
 		 * supported by the controller.
@@ -2000,6 +2091,23 @@ struct drm_connector {
 		 * @funcs: HDMI connector Control Functions
 		 */
 		const struct drm_connector_hdmi_funcs *funcs;
+
+		/**
+		 * @infoframes: Current Infoframes output by the connector
+		 */
+		struct {
+			/**
+			 * @lock: Mutex protecting against concurrent access to
+			 * the infoframes, most notably between KMS and ALSA.
+			 */
+			struct mutex lock;
+
+			/**
+			 * @audio: Current Audio Infoframes structure. Protected
+			 * by @lock.
+			 */
+			struct drm_connector_hdmi_infoframe audio;
+		} infoframes;
 	} hdmi;
 };
 
@@ -2021,6 +2129,7 @@ int drmm_connector_init(struct drm_device *dev,
 			struct i2c_adapter *ddc);
 int drmm_connector_hdmi_init(struct drm_device *dev,
 			     struct drm_connector *connector,
+			     const char *vendor, const char *product,
 			     const struct drm_connector_funcs *funcs,
 			     const struct drm_connector_hdmi_funcs *hdmi_funcs,
 			     int connector_type,
-- 
cgit v1.2.3


From db003a28e03f95f2bcb63f037a2078b8870b1ecd Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 28 May 2024 14:33:20 +0200
Subject: netfs: fix kernel doc for nets_wait_for_outstanding_io()

The @inode parameter wasn't documented leading to new doc build
warnings.

Fixes: f89ea63f1c65 ("netfs, 9p: Fix race between umount and async request completion")
Link: https://lore.kernel.org/r/20240528133050.7e09d78e@canb.auug.org.au
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/netfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 3ca3906bb8da..5d0288938cc2 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -521,7 +521,7 @@ static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
 
 /**
  * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete
- * @ctx: The netfs inode to wait on
+ * @inode: The netfs inode to wait on
  *
  * Wait for outstanding I/O requests of any type to complete.  This is intended
  * to be called from inode eviction routines.  This makes sure that any
-- 
cgit v1.2.3


From 620c266f394932e5decc4b34683a75dfc59dc2f4 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 24 May 2024 12:19:39 +0200
Subject: fhandle: relax open_by_handle_at() permission checks

A current limitation of open_by_handle_at() is that it's currently not possible
to use it from within containers at all because we require CAP_DAC_READ_SEARCH
in the initial namespace. That's unfortunate because there are scenarios where
using open_by_handle_at() from within containers.

Two examples:

(1) cgroupfs allows to encode cgroups to file handles and reopen them with
    open_by_handle_at().
(2) Fanotify allows placing filesystem watches they currently aren't usable in
    containers because the returned file handles cannot be used.

Here's a proposal for relaxing the permission check for open_by_handle_at().

(1) Opening file handles when the caller has privileges over the filesystem
    (1.1) The caller has an unobstructed view of the filesystem.
    (1.2) The caller has permissions to follow a path to the file handle.

This doesn't address the problem of opening a file handle when only a portion
of a filesystem is exposed as is common in containers by e.g., bind-mounting a
subtree. The proposal to solve this use-case is:

(2) Opening file handles when the caller has privileges over a subtree
    (2.1) The caller is able to reach the file from the provided mount fd.
    (2.2) The caller has permissions to construct an unobstructed path to the
          file handle.
    (2.3) The caller has permissions to follow a path to the file handle.

The relaxed permission checks are currently restricted to directory file
handles which are what both cgroupfs and fanotify need. Handling disconnected
non-directory file handles would lead to a potentially non-deterministic api.
If a disconnected non-directory file handle is provided we may fail to decode
a valid path that we could use for permission checking. That in itself isn't a
problem as we would just return EACCES in that case. However, confusion may
arise if a non-disconnected dentry ends up in the cache later and those opening
the file handle would suddenly succeed.

* It's potentially possible to use timing information (side-channel) to infer
  whether a given inode exists. I don't think that's particularly
  problematic. Thanks to Jann for bringing this to my attention.

* An unrelated note (IOW, these are thoughts that apply to
  open_by_handle_at() generically and are unrelated to the changes here):
  Jann pointed out that we should verify whether deleted files could
  potentially be reopened through open_by_handle_at(). I don't think that's
  possible though.

  Another potential thing to check is whether open_by_handle_at() could be
  abused to open internal stuff like memfds or gpu stuff. I don't think so
  but I haven't had the time to completely verify this.

This dates back to discussions Amir and I had quite some time ago and thanks to
him for providing a lot of details around the export code and related patches!

Link: https://lore.kernel.org/r/20240524-vfs-open_by_handle_at-v1-1-3d4b7d22736b@kernel.org
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/exportfs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index bb37ad5cc954..893a1d21dc1c 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -158,6 +158,7 @@ struct fid {
 
 #define EXPORT_FH_CONNECTABLE	0x1 /* Encode file handle with parent */
 #define EXPORT_FH_FID		0x2 /* File handle may be non-decodeable */
+#define EXPORT_FH_DIR_ONLY	0x4 /* Only decode file handle for a directory */
 
 /**
  * struct export_operations - for nfsd to communicate with file systems
@@ -305,6 +306,7 @@ static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid,
 extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt,
 					     struct fid *fid, int fh_len,
 					     int fileid_type,
+					     unsigned int flags,
 					     int (*acceptable)(void *, struct dentry *),
 					     void *context);
 extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
-- 
cgit v1.2.3


From 1866407831deb945a16c60c38246f28c2475b28a Mon Sep 17 00:00:00 2001
From: Richard Acayan <mailingradian@gmail.com>
Date: Thu, 23 May 2024 21:20:25 -0400
Subject: dt-bindings: arm: qcom,ids: Add SoC ID for SDM670

The socinfo driver uses SoC IDs from the device tree bindings.
Add the SoC ID for SDM670.

Signed-off-by: Richard Acayan <mailingradian@gmail.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240524012023.318965-6-mailingradian@gmail.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/arm/qcom,ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h
index a2958952a9ec..d6c9e9472121 100644
--- a/include/dt-bindings/arm/qcom,ids.h
+++ b/include/dt-bindings/arm/qcom,ids.h
@@ -175,6 +175,7 @@
 #define QCOM_ID_SDA630			327
 #define QCOM_ID_MSM8905			331
 #define QCOM_ID_SDX202			333
+#define QCOM_ID_SDM670			336
 #define QCOM_ID_SDM450			338
 #define QCOM_ID_SM8150			339
 #define QCOM_ID_SDA845			341
-- 
cgit v1.2.3


From 14a1d1dc35d346a1523f38f6517c349dfa447a58 Mon Sep 17 00:00:00 2001
From: Alex Bee <knaerzche@gmail.com>
Date: Thu, 9 May 2024 16:06:48 +0200
Subject: dt-bindings: clock: rk3128: Add PCLK_MIPIPHY

The DPHY's APB clock is required to be exposed in order to be able to
enable it and access the phy's registers.

Signed-off-by: Alex Bee <knaerzche@gmail.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240509140653.168591-3-knaerzche@gmail.com
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 include/dt-bindings/clock/rk3128-cru.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/rk3128-cru.h b/include/dt-bindings/clock/rk3128-cru.h
index 6a47825dac5d..1be455ba4985 100644
--- a/include/dt-bindings/clock/rk3128-cru.h
+++ b/include/dt-bindings/clock/rk3128-cru.h
@@ -116,6 +116,7 @@
 #define PCLK_GMAC		367
 #define PCLK_PMU_PRE		368
 #define PCLK_SIM_CARD		369
+#define PCLK_MIPIPHY		370
 
 /* hclk gates */
 #define HCLK_SPDIF		440
-- 
cgit v1.2.3


From 4edd7dc82bd6be6989c034ccbbbccdc61034e33b Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Tue, 30 Apr 2024 11:43:43 +0530
Subject: PCI: endpoint: Rename core_init() callback in 'struct
 pci_epc_event_ops' to epc_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

core_init() callback is used to notify the EPC initialization event to the
EPF drivers. The 'core' prefix was used indicate that the controller IP
core has completed initialization. But it serves no purpose as the EPF
driver will only care about the EPC initialization as a whole and there is
no real benefit to distinguish the IP core part.

Rename the core_init() callback in 'struct pci_epc_event_ops' to epc_init()
to make it more clear.

Link: https://lore.kernel.org/linux-pci/20240430-pci-epf-rework-v4-2-22832d0d456f@linaro.org
Tested-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/pci-epf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index adee6a1b35db..afe3bfd88742 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -70,13 +70,13 @@ struct pci_epf_ops {
 
 /**
  * struct pci_epc_event_ops - Callbacks for capturing the EPC events
- * @core_init: Callback for the EPC initialization complete event
+ * @epc_init: Callback for the EPC initialization complete event
  * @link_up: Callback for the EPC link up event
  * @link_down: Callback for the EPC link down event
  * @bme: Callback for the EPC BME (Bus Master Enable) event
  */
 struct pci_epc_event_ops {
-	int (*core_init)(struct pci_epf *epf);
+	int (*epc_init)(struct pci_epf *epf);
 	int (*link_up)(struct pci_epf *epf);
 	int (*link_down)(struct pci_epf *epf);
 	int (*bme)(struct pci_epf *epf);
-- 
cgit v1.2.3


From f58838d7feb04809257f54a8b256786d2f9dfe01 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Tue, 30 Apr 2024 11:43:44 +0530
Subject: PCI: endpoint: Rename BME to Bus Master Enable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BME which stands for 'Bus Master Enable' is not defined in the PCIe base
spec even though it is commonly referred in many places (vendor docs).  To
align with the spec, rename it to its expansion 'Bus Master Enable'.

Suggested-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/linux-pci/20240430-pci-epf-rework-v4-3-22832d0d456f@linaro.org
Link: https://lore.kernel.org/linux-pci/20240430-pci-epf-rework-v4-4-22832d0d456f@linaro.org
Tested-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
[bhelgaas: squash removal of irrelevant 'Link is enabled']
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/pci-epc.h | 2 +-
 include/linux/pci-epf.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index acc5f96161fe..11115cd0fe5b 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -226,7 +226,7 @@ void pci_epc_linkup(struct pci_epc *epc);
 void pci_epc_linkdown(struct pci_epc *epc);
 void pci_epc_init_notify(struct pci_epc *epc);
 void pci_epc_notify_pending_init(struct pci_epc *epc, struct pci_epf *epf);
-void pci_epc_bme_notify(struct pci_epc *epc);
+void pci_epc_bus_master_enable_notify(struct pci_epc *epc);
 void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf,
 			enum pci_epc_interface_type type);
 int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index afe3bfd88742..dc759eb7157c 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -73,13 +73,13 @@ struct pci_epf_ops {
  * @epc_init: Callback for the EPC initialization complete event
  * @link_up: Callback for the EPC link up event
  * @link_down: Callback for the EPC link down event
- * @bme: Callback for the EPC BME (Bus Master Enable) event
+ * @bus_master_enable: Callback for the EPC Bus Master Enable event
  */
 struct pci_epc_event_ops {
 	int (*epc_init)(struct pci_epf *epf);
 	int (*link_up)(struct pci_epf *epf);
 	int (*link_down)(struct pci_epf *epf);
-	int (*bme)(struct pci_epf *epf);
+	int (*bus_master_enable)(struct pci_epf *epf);
 };
 
 /**
-- 
cgit v1.2.3


From 4eed3dd7116814c82630b0f1b0f73fa96707134f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Tue, 7 May 2024 13:25:19 +0300
Subject: resource: Use typedef for alignf callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To make it simpler to declare resource constraint alignf callbacks, add
typedef for it and document it.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20240507102523.57320-5-ilpo.jarvinen@linux.intel.com
Tested-by: Lidong Wang <lidong.wang@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 include/linux/ioport.h | 22 ++++++++++++++++++----
 include/linux/pci.h    |  5 +----
 2 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index db7fe25f3370..28266426e5bf 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -188,6 +188,23 @@ enum {
 #define DEFINE_RES_DMA(_dma)						\
 	DEFINE_RES_DMA_NAMED((_dma), NULL)
 
+/**
+ * typedef resource_alignf - Resource alignment callback
+ * @data:	Private data used by the callback
+ * @res:	Resource candidate range (an empty resource space)
+ * @size:	The minimum size of the empty space
+ * @align:	Alignment from the constraints
+ *
+ * Callback allows calculating resource placement and alignment beyond min,
+ * max, and align fields in the struct resource_constraint.
+ *
+ * Return: Start address for the resource.
+ */
+typedef resource_size_t (*resource_alignf)(void *data,
+					   const struct resource *res,
+					   resource_size_t size,
+					   resource_size_t align);
+
 /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
 extern struct resource ioport_resource;
 extern struct resource iomem_resource;
@@ -207,10 +224,7 @@ extern void arch_remove_reservations(struct resource *avail);
 extern int allocate_resource(struct resource *root, struct resource *new,
 			     resource_size_t size, resource_size_t min,
 			     resource_size_t max, resource_size_t align,
-			     resource_size_t (*alignf)(void *,
-						       const struct resource *,
-						       resource_size_t,
-						       resource_size_t),
+			     resource_alignf alignf,
 			     void *alignf_data);
 struct resource *lookup_resource(struct resource *root, resource_size_t start);
 int adjust_resource(struct resource *res, resource_size_t start,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index fb004fd4e889..760fdc4b37cc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1551,10 +1551,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
 			struct resource *res, resource_size_t size,
 			resource_size_t align, resource_size_t min,
 			unsigned long type_mask,
-			resource_size_t (*alignf)(void *,
-						  const struct resource *,
-						  resource_size_t,
-						  resource_size_t),
+			resource_alignf alignf,
 			void *alignf_data);
 
 
-- 
cgit v1.2.3


From 2700225304e8e4f0f29f2bbb8ad0c112c9666d90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Tue, 7 May 2024 13:25:21 +0300
Subject: resource: Export find_resource_space()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PCI bridge window logic needs to find out in advance to the actual
allocation if there is an empty space big enough to fit the window.

Export find_resource_space() for the purpose. Also move the struct
resource_constraint into generic header to be able to use the new
interface.

Link: https://lore.kernel.org/r/20240507102523.57320-7-ilpo.jarvinen@linux.intel.com
Tested-by: Lidong Wang <lidong.wang@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 include/linux/ioport.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 28266426e5bf..6e9fb667a1c5 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -205,6 +205,25 @@ typedef resource_size_t (*resource_alignf)(void *data,
 					   resource_size_t size,
 					   resource_size_t align);
 
+/**
+ * struct resource_constraint - constraints to be met while searching empty
+ *				resource space
+ * @min:		The minimum address for the memory range
+ * @max:		The maximum address for the memory range
+ * @align:		Alignment for the start address of the empty space
+ * @alignf:		Additional alignment constraints callback
+ * @alignf_data:	Data provided for @alignf callback
+ *
+ * Contains the range and alignment constraints that have to be met during
+ * find_resource_space(). @alignf can be NULL indicating no alignment beyond
+ * @align is necessary.
+ */
+struct resource_constraint {
+	resource_size_t min, max, align;
+	resource_alignf alignf;
+	void *alignf_data;
+};
+
 /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
 extern struct resource ioport_resource;
 extern struct resource iomem_resource;
@@ -278,6 +297,9 @@ static inline bool resource_union(const struct resource *r1, const struct resour
 	return true;
 }
 
+int find_resource_space(struct resource *root, struct resource *new,
+			resource_size_t size, struct resource_constraint *constraint);
+
 /* Convenience shorthand with allocation */
 #define request_region(start,n,name)		__request_region(&ioport_resource, (start), (n), (name), 0)
 #define request_muxed_region(start,n,name)	__request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED)
-- 
cgit v1.2.3


From c7ce956bb6d0f32ab921b6ffba1a6a834df96f21 Mon Sep 17 00:00:00 2001
From: MarileneGarcia <marilene.agarcia@gmail.com>
Date: Sun, 19 May 2024 00:10:27 -0300
Subject: drm/dp: Fix documentation warning

It fixes the following warnings when
the kernel documentation is generated:

./include/drm/display/drm_dp_helper.h:126:
warning: Function parameter or struct member
'mode' not described in 'drm_dp_as_sdp'

./include/drm/display/drm_dp_helper.h:126:
warning: Excess struct member 'operation_mode'
description in 'drm_dp_as_sdp'

Signed-off-by: MarileneGarcia <marilene.agarcia@gmail.com>
Fixes: 0bbb8f594e33 ("drm/dp: Add Adaptive Sync SDP logging")
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/r/20240405141640.09b0bdbf@canb.auug.org.au
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240519031027.433751-1-marilene.agarcia@gmail.com
---
 include/drm/display/drm_dp_helper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h
index 8bed890eec2c..8defcc399f42 100644
--- a/include/drm/display/drm_dp_helper.h
+++ b/include/drm/display/drm_dp_helper.h
@@ -112,7 +112,7 @@ struct drm_dp_vsc_sdp {
  * @target_rr: Target Refresh
  * @duration_incr_ms: Successive frame duration increase
  * @duration_decr_ms: Successive frame duration decrease
- * @operation_mode: Adaptive Sync Operation Mode
+ * @mode: Adaptive Sync Operation Mode
  */
 struct drm_dp_as_sdp {
 	unsigned char sdp_type;
-- 
cgit v1.2.3


From 475beea0b9f631656b5cc39429a39696876af613 Mon Sep 17 00:00:00 2001
From: Alexandru Gagniuc <mr.nuke.me@gmail.com>
Date: Tue, 30 Apr 2024 23:07:43 -0500
Subject: dt-bindings: clock: Add PCIe pipe related clocks for IPQ9574

Add defines for the missing PCIe PIPE clocks.

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240501040800.1542805-2-mr.nuke.me@gmail.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,ipq9574-gcc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,ipq9574-gcc.h b/include/dt-bindings/clock/qcom,ipq9574-gcc.h
index 08fd3a37acaa..52123c5a09fa 100644
--- a/include/dt-bindings/clock/qcom,ipq9574-gcc.h
+++ b/include/dt-bindings/clock/qcom,ipq9574-gcc.h
@@ -216,4 +216,8 @@
 #define GCC_CRYPTO_AHB_CLK				207
 #define GCC_USB0_PIPE_CLK				208
 #define GCC_USB0_SLEEP_CLK				209
+#define GCC_PCIE0_PIPE_CLK				210
+#define GCC_PCIE1_PIPE_CLK				211
+#define GCC_PCIE2_PIPE_CLK				212
+#define GCC_PCIE3_PIPE_CLK				213
 #endif
-- 
cgit v1.2.3


From 8526f8c877baf3f9e678b31fd7d1066b776775cc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 23 May 2024 12:02:13 +0200
Subject: wifi: nl80211: clean up coalescing rule handling

There's no need to allocate a tiny struct and then
an array again, just allocate the two together and
use __counted_by(). Also unify the freeing.

Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240523120213.48a40cfb96f9.Ia02bf8f8fefbf533c64c5fa26175848d4a3a7899@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d79180bec7a1..5da9bb0ac6a4 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3566,8 +3566,8 @@ struct cfg80211_coalesce_rules {
  * @n_rules: number of rules
  */
 struct cfg80211_coalesce {
-	struct cfg80211_coalesce_rules *rules;
 	int n_rules;
+	struct cfg80211_coalesce_rules rules[] __counted_by(n_rules);
 };
 
 /**
-- 
cgit v1.2.3


From ed7ee6a69f9289337af4835a908aa782263d4852 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Wed, 29 May 2024 10:39:01 +0200
Subject: statx: Update offset commentary for struct statx

In commit 2a82bb02941f ("statx: stx_subvol"), a new member was added to
struct statx, but the offset comment was not correct. Update it.

Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240529081725.3769290-1-john.g.garry@oracle.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/stat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 67626d535316..95770941ee2c 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -126,8 +126,8 @@ struct statx {
 	__u64	stx_mnt_id;
 	__u32	stx_dio_mem_align;	/* Memory buffer alignment for direct I/O */
 	__u32	stx_dio_offset_align;	/* File offset alignment for direct I/O */
-	__u64	stx_subvol;	/* Subvolume identifier */
 	/* 0xa0 */
+	__u64	stx_subvol;	/* Subvolume identifier */
 	__u64	__spare3[11];	/* Spare space for future expansion */
 	/* 0x100 */
 };
-- 
cgit v1.2.3


From 1deba6e24c221c61c6eab8656a53f8c17035932b Mon Sep 17 00:00:00 2001
From: Brent Lu <brent.lu@intel.com>
Date: Mon, 27 May 2024 14:35:47 -0500
Subject: ASoC: SOF: sof-audio: add sof_dai_get_tdm_slots function

An new interface, sof_dai_get_tdm_slots(), is added for machine driver
to get tdm slot number from topology. The dai_get_param() callback
needs to support new parameter type SOF_DAI_PARAM_INTEL_SSP_TDM_SLOTS
by returning the tdm slot number of specific SSP port.

Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Signed-off-by: Brent Lu <brent.lu@intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://msgid.link/r/20240527193552.165567-14-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/sof.h b/include/sound/sof.h
index ec6c30d54592..64fd5504cb2b 100644
--- a/include/sound/sof.h
+++ b/include/sound/sof.h
@@ -173,5 +173,6 @@ struct sof_dev_desc {
 
 int sof_dai_get_mclk(struct snd_soc_pcm_runtime *rtd);
 int sof_dai_get_bclk(struct snd_soc_pcm_runtime *rtd);
+int sof_dai_get_tdm_slots(struct snd_soc_pcm_runtime *rtd);
 
 #endif
-- 
cgit v1.2.3


From cc5ac966f26193ab185cc43d64d9f1ae998ccb6e Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Wed, 22 May 2024 19:42:57 +0800
Subject: cachefiles: add output string to cachefiles_obj_[get|put]_ondemand_fd

This lets us see the correct trace output.

Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie")
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Link: https://lore.kernel.org/r/20240522114308.2402121-2-libaokun@huaweicloud.com
Acked-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/cachefiles.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index cf4b98b9a9ed..e3213af847cd 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -127,7 +127,9 @@ enum cachefiles_error_trace {
 	EM(cachefiles_obj_see_lookup_cookie,	"SEE lookup_cookie")	\
 	EM(cachefiles_obj_see_lookup_failed,	"SEE lookup_failed")	\
 	EM(cachefiles_obj_see_withdraw_cookie,	"SEE withdraw_cookie")	\
-	E_(cachefiles_obj_see_withdrawal,	"SEE withdrawal")
+	EM(cachefiles_obj_see_withdrawal,	"SEE withdrawal")	\
+	EM(cachefiles_obj_get_ondemand_fd,      "GET ondemand_fd")	\
+	E_(cachefiles_obj_put_ondemand_fd,      "PUT ondemand_fd")
 
 #define cachefiles_coherency_traces					\
 	EM(cachefiles_coherency_check_aux,	"BAD aux ")		\
-- 
cgit v1.2.3


From da4a827416066191aafeeccee50a8836a826ba10 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Wed, 22 May 2024 19:43:00 +0800
Subject: cachefiles: fix slab-use-after-free in
 cachefiles_ondemand_daemon_read()

We got the following issue in a fuzz test of randomly issuing the restore
command:

==================================================================
BUG: KASAN: slab-use-after-free in cachefiles_ondemand_daemon_read+0xb41/0xb60
Read of size 8 at addr ffff888122e84088 by task ondemand-04-dae/963

CPU: 13 PID: 963 Comm: ondemand-04-dae Not tainted 6.8.0-dirty #564
Call Trace:
 kasan_report+0x93/0xc0
 cachefiles_ondemand_daemon_read+0xb41/0xb60
 vfs_read+0x169/0xb50
 ksys_read+0xf5/0x1e0

Allocated by task 116:
 kmem_cache_alloc+0x140/0x3a0
 cachefiles_lookup_cookie+0x140/0xcd0
 fscache_cookie_state_machine+0x43c/0x1230
 [...]

Freed by task 792:
 kmem_cache_free+0xfe/0x390
 cachefiles_put_object+0x241/0x480
 fscache_cookie_state_machine+0x5c8/0x1230
 [...]
==================================================================

Following is the process that triggers the issue:

     mount  |   daemon_thread1    |    daemon_thread2
------------------------------------------------------------
cachefiles_withdraw_cookie
 cachefiles_ondemand_clean_object(object)
  cachefiles_ondemand_send_req
   REQ_A = kzalloc(sizeof(*req) + data_len)
   wait_for_completion(&REQ_A->done)

            cachefiles_daemon_read
             cachefiles_ondemand_daemon_read
              REQ_A = cachefiles_ondemand_select_req
              msg->object_id = req->object->ondemand->ondemand_id
                                  ------ restore ------
                                  cachefiles_ondemand_restore
                                  xas_for_each(&xas, req, ULONG_MAX)
                                   xas_set_mark(&xas, CACHEFILES_REQ_NEW)

                                  cachefiles_daemon_read
                                   cachefiles_ondemand_daemon_read
                                    REQ_A = cachefiles_ondemand_select_req
              copy_to_user(_buffer, msg, n)
               xa_erase(&cache->reqs, id)
               complete(&REQ_A->done)
              ------ close(fd) ------
              cachefiles_ondemand_fd_release
               cachefiles_put_object
 cachefiles_put_object
  kmem_cache_free(cachefiles_object_jar, object)
                                    REQ_A->object->ondemand->ondemand_id
                                     // object UAF !!!

When we see the request within xa_lock, req->object must not have been
freed yet, so grab the reference count of object before xa_unlock to
avoid the above issue.

Fixes: 0a7e54c1959c ("cachefiles: resend an open request if the read request's object is closed")
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Link: https://lore.kernel.org/r/20240522114308.2402121-5-libaokun@huaweicloud.com
Acked-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com>
Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/cachefiles.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index e3213af847cd..7d931db02b93 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -33,6 +33,8 @@ enum cachefiles_obj_ref_trace {
 	cachefiles_obj_see_withdrawal,
 	cachefiles_obj_get_ondemand_fd,
 	cachefiles_obj_put_ondemand_fd,
+	cachefiles_obj_get_read_req,
+	cachefiles_obj_put_read_req,
 };
 
 enum fscache_why_object_killed {
@@ -129,7 +131,9 @@ enum cachefiles_error_trace {
 	EM(cachefiles_obj_see_withdraw_cookie,	"SEE withdraw_cookie")	\
 	EM(cachefiles_obj_see_withdrawal,	"SEE withdrawal")	\
 	EM(cachefiles_obj_get_ondemand_fd,      "GET ondemand_fd")	\
-	E_(cachefiles_obj_put_ondemand_fd,      "PUT ondemand_fd")
+	EM(cachefiles_obj_put_ondemand_fd,      "PUT ondemand_fd")	\
+	EM(cachefiles_obj_get_read_req,		"GET read_req")		\
+	E_(cachefiles_obj_put_read_req,		"PUT read_req")
 
 #define cachefiles_coherency_traces					\
 	EM(cachefiles_coherency_check_aux,	"BAD aux ")		\
-- 
cgit v1.2.3


From 7c327d56597d8de1680cf24e956b704270d3d84a Mon Sep 17 00:00:00 2001
From: Richard Maina <quic_rmaina@quicinc.com>
Date: Wed, 29 May 2024 11:09:55 -0700
Subject: hwspinlock: Introduce hwspin_lock_bust()

When a remoteproc crashes or goes down unexpectedly this can result in
a state where locks held by the remoteproc will remain locked possibly
resulting in deadlock. This new API hwspin_lock_bust() allows
hwspinlock implementers to define a bust operation for freeing previously
acquired hwspinlocks after verifying ownership of the acquired lock.

Signed-off-by: Richard Maina <quic_rmaina@quicinc.com>
Reviewed-by: Bjorn Andersson <andersson@kernel.org>
Signed-off-by: Chris Lew <quic_clew@quicinc.com>
Link: https://lore.kernel.org/r/20240529-hwspinlock-bust-v3-1-c8b924ffa5a2@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/hwspinlock.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h
index bfe7c1f1ac6d..f0231dbc4777 100644
--- a/include/linux/hwspinlock.h
+++ b/include/linux/hwspinlock.h
@@ -68,6 +68,7 @@ int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int,
 int __hwspin_trylock(struct hwspinlock *, int, unsigned long *);
 void __hwspin_unlock(struct hwspinlock *, int, unsigned long *);
 int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name);
+int hwspin_lock_bust(struct hwspinlock *hwlock, unsigned int id);
 int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock);
 struct hwspinlock *devm_hwspin_lock_request(struct device *dev);
 struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev,
@@ -127,6 +128,11 @@ void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags)
 {
 }
 
+static inline int hwspin_lock_bust(struct hwspinlock *hwlock, unsigned int id)
+{
+	return 0;
+}
+
 static inline int of_hwspin_lock_get_id(struct device_node *np, int index)
 {
 	return 0;
-- 
cgit v1.2.3


From 2e3f0d693875db698891ffe89a18121bda5b95b8 Mon Sep 17 00:00:00 2001
From: Chris Lew <quic_clew@quicinc.com>
Date: Wed, 29 May 2024 11:09:57 -0700
Subject: soc: qcom: smem: Add qcom_smem_bust_hwspin_lock_by_host()

Add qcom_smem_bust_hwspin_lock_by_host to enable remoteproc to bust the
hwspin_lock owned by smem. In the event the remoteproc crashes
unexpectedly, the remoteproc driver can invoke this API to try and bust
the hwspin_lock and release the lock if still held by the remoteproc
device.

Signed-off-by: Chris Lew <quic_clew@quicinc.com>
Reviewed-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Link: https://lore.kernel.org/r/20240529-hwspinlock-bust-v3-3-c8b924ffa5a2@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/smem.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h
index a36a3b9d4929..03187bc95851 100644
--- a/include/linux/soc/qcom/smem.h
+++ b/include/linux/soc/qcom/smem.h
@@ -14,4 +14,6 @@ phys_addr_t qcom_smem_virt_to_phys(void *p);
 
 int qcom_smem_get_soc_id(u32 *id);
 
+int qcom_smem_bust_hwspin_lock_by_host(unsigned int host);
+
 #endif
-- 
cgit v1.2.3


From 5e514f1cba090e1c8fff03e92a175eccfe46305f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 28 May 2024 12:52:50 +0000
Subject: tcp: add tcp_done_with_error() helper

tcp_reset() ends with a sequence that is carefuly ordered.

We need to fix [e]poll bugs in the following patches,
it makes sense to use a common helper.

Suggested-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Link: https://lore.kernel.org/r/20240528125253.1966136-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 060e95b331a2..32815a40dea1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -677,6 +677,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
 /* tcp_input.c */
 void tcp_rearm_rto(struct sock *sk);
 void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
+void tcp_done_with_error(struct sock *sk, int err);
 void tcp_reset(struct sock *sk, struct sk_buff *skb);
 void tcp_fin(struct sock *sk);
 void tcp_check_space(struct sock *sk);
-- 
cgit v1.2.3


From bbb31b7ae14594aa2a7e74923ee38f312404ad66 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 28 May 2024 16:05:09 +0100
Subject: net: dsa: remove mac_prepare()/mac_finish() shims

No DSA driver makes use of the mac_prepare()/mac_finish() shimmed
operations anymore, so we can remove these.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://lore.kernel.org/r/E1sByNx-00ELW1-Vp@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dsa.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index b60e7e410aba..f9ae3ca66b6f 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -882,15 +882,9 @@ struct dsa_switch_ops {
 	struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds,
 						      int port,
 						      phy_interface_t iface);
-	int	(*phylink_mac_prepare)(struct dsa_switch *ds, int port,
-				       unsigned int mode,
-				       phy_interface_t interface);
 	void	(*phylink_mac_config)(struct dsa_switch *ds, int port,
 				      unsigned int mode,
 				      const struct phylink_link_state *state);
-	int	(*phylink_mac_finish)(struct dsa_switch *ds, int port,
-				      unsigned int mode,
-				      phy_interface_t interface);
 	void	(*phylink_mac_link_down)(struct dsa_switch *ds, int port,
 					 unsigned int mode,
 					 phy_interface_t interface);
-- 
cgit v1.2.3


From 6f6bfbc595fbae95f4c1ff80c87d089604e5e6a1 Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Mon, 27 May 2024 23:11:37 -0700
Subject: RDMA/bnxt_re: Expose the MSN table capability for user library

BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED was introduced to share the HW
retransmit capability between driver and lib. The main difference in
implementation for HW Retransmit support is the usage of MSN table or
PSN table . When HW retrans is enabled, HW expects MSN table to be
allocated by driver/lib, else PSN table (for older adapters).

FW expose a new field which gives MSN capability. Drivers and libs can
depend on the new field instead of HW Retrasns capability. For
adapters which support HW_RETX feature, MSN table capability will be
set. For older adapters, this value will be 0(to maintain backward
compatibility with older FW).

Rename UAPI just to capture the correct name of the HW capability
that driver/library is interested in. No functional impact even if
older rdma-core is used.

Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Link: https://lore.kernel.org/r/1716876697-25970-3-git-send-email-selvin.xavier@broadcom.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/uapi/rdma/bnxt_re-abi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index c0c34aca90ec..e61104f35d73 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -55,7 +55,7 @@ enum {
 	BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED = 0x04ULL,
 	BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED = 0x08ULL,
 	BNXT_RE_UCNTX_CMASK_POW2_DISABLED = 0x10ULL,
-	BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED = 0x40,
+	BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED = 0x40,
 };
 
 enum bnxt_re_wqe_mode {
-- 
cgit v1.2.3


From fdefb918496235a11d6c5477c34c81aab2c1343b Mon Sep 17 00:00:00 2001
From: Konstantin Taranov <kotaranov@microsoft.com>
Date: Wed, 22 May 2024 01:24:01 -0700
Subject: RDMA/mana_ib: Implement uapi to create and destroy RC QP

Implement user requests to create and destroy an RC QP.
As the user does not have an FMR queue, it is skipped and NO_FMR flag
is used.

Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
Link: https://lore.kernel.org/r/1716366242-558-3-git-send-email-kotaranov@linux.microsoft.com
Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/uapi/rdma/mana-abi.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/uapi/rdma/mana-abi.h b/include/uapi/rdma/mana-abi.h
index 2c41cc315218..45c2df619f07 100644
--- a/include/uapi/rdma/mana-abi.h
+++ b/include/uapi/rdma/mana-abi.h
@@ -45,6 +45,15 @@ struct mana_ib_create_qp_resp {
 	__u32 reserved;
 };
 
+struct mana_ib_create_rc_qp {
+	__aligned_u64 queue_buf[4];
+	__u32 queue_size[4];
+};
+
+struct mana_ib_create_rc_qp_resp {
+	__u32 queue_id[4];
+};
+
 struct mana_ib_create_wq {
 	__aligned_u64 wq_buf_addr;
 	__u32 wq_buf_size;
-- 
cgit v1.2.3


From 73fc975318e0ab3385c5b3372c7b296ae58c8d6b Mon Sep 17 00:00:00 2001
From: Durai Manickam KR <durai.manickamkr@microchip.com>
Date: Wed, 24 Apr 2024 11:03:45 +0530
Subject: drm: atmel-hlcdc: Define XLCDC specific registers

The register address of the XLCDC IP used in SAM9X7 SoC family
are different from the previous HLCDC. Defining those address
space with valid macros.

Signed-off-by: Durai Manickam KR <durai.manickamkr@microchip.com>
[manikandan.m@microchip.com: Remove unused macro definitions]
Signed-off-by: Manikandan Muralidharan <manikandan.m@microchip.com>
Acked-by: Lee Jones <lee@kernel.org>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240424053351.589830-3-manikandan.m@microchip.com
---
 include/linux/mfd/atmel-hlcdc.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/atmel-hlcdc.h b/include/linux/mfd/atmel-hlcdc.h
index a186119a49b5..80d675a03b39 100644
--- a/include/linux/mfd/atmel-hlcdc.h
+++ b/include/linux/mfd/atmel-hlcdc.h
@@ -22,6 +22,8 @@
 #define ATMEL_HLCDC_DITHER		BIT(6)
 #define ATMEL_HLCDC_DISPDLY		BIT(7)
 #define ATMEL_HLCDC_MODE_MASK		GENMASK(9, 8)
+#define ATMEL_XLCDC_MODE_MASK		GENMASK(10, 8)
+#define ATMEL_XLCDC_DPI			BIT(11)
 #define ATMEL_HLCDC_PP			BIT(10)
 #define ATMEL_HLCDC_VSPSU		BIT(12)
 #define ATMEL_HLCDC_VSPHO		BIT(13)
@@ -34,6 +36,12 @@
 #define ATMEL_HLCDC_IDR			0x30
 #define ATMEL_HLCDC_IMR			0x34
 #define ATMEL_HLCDC_ISR			0x38
+#define ATMEL_XLCDC_ATTRE		0x3c
+
+#define ATMEL_XLCDC_BASE_UPDATE		BIT(0)
+#define ATMEL_XLCDC_OVR1_UPDATE		BIT(1)
+#define ATMEL_XLCDC_OVR3_UPDATE		BIT(2)
+#define ATMEL_XLCDC_HEO_UPDATE		BIT(3)
 
 #define ATMEL_HLCDC_CLKPOL		BIT(0)
 #define ATMEL_HLCDC_CLKSEL		BIT(2)
@@ -48,6 +56,8 @@
 #define ATMEL_HLCDC_DISP		BIT(2)
 #define ATMEL_HLCDC_PWM			BIT(3)
 #define ATMEL_HLCDC_SIP			BIT(4)
+#define ATMEL_XLCDC_SD			BIT(5)
+#define ATMEL_XLCDC_CM			BIT(6)
 
 #define ATMEL_HLCDC_SOF			BIT(0)
 #define ATMEL_HLCDC_SYNCDIS		BIT(1)
-- 
cgit v1.2.3


From 2c7afc2a880cd4899f9dd6bfa62f10f84773148b Mon Sep 17 00:00:00 2001
From: Ivan Orlov <ivan.orlov0322@gmail.com>
Date: Thu, 16 May 2024 22:17:31 +0100
Subject: kunit: Cover 'assert.c' with tests

There are multiple assertion formatting functions in the `assert.c`
file, which are not covered with tests yet. Implement the KUnit test
for these functions.

The test consists of 11 test cases for the following functions:

1) 'is_literal'
2) 'is_str_literal'
3) 'kunit_assert_prologue', test case for multiple assert types
4) 'kunit_assert_print_msg'
5) 'kunit_unary_assert_format'
6) 'kunit_ptr_not_err_assert_format'
7) 'kunit_binary_assert_format'
8) 'kunit_binary_ptr_assert_format'
9) 'kunit_binary_str_assert_format'
10) 'kunit_assert_hexdump'
11) 'kunit_mem_assert_format'

The test aims at maximizing the branch coverage for the assertion
formatting functions.

As you can see, it covers some of the static helper functions as
well, so mark the static functions in `assert.c` as 'VISIBLE_IF_KUNIT'
and conditionally export them with EXPORT_SYMBOL_IF_KUNIT. Add the
corresponding definitions to `assert.h`.

Build the assert test when CONFIG_KUNIT_TEST is enabled, similar to
how it is done for the string stream test.

Signed-off-by: Ivan Orlov <ivan.orlov0322@gmail.com>
Reviewed-by: Rae Moar <rmoar@google.com>
Acked-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/assert.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/kunit/assert.h b/include/kunit/assert.h
index 24c2b9fa61e8..7e7490a74b13 100644
--- a/include/kunit/assert.h
+++ b/include/kunit/assert.h
@@ -218,4 +218,15 @@ void kunit_mem_assert_format(const struct kunit_assert *assert,
 			     const struct va_format *message,
 			     struct string_stream *stream);
 
+#if IS_ENABLED(CONFIG_KUNIT)
+void kunit_assert_print_msg(const struct va_format *message,
+			    struct string_stream *stream);
+bool is_literal(const char *text, long long value);
+bool is_str_literal(const char *text, const char *value);
+void kunit_assert_hexdump(struct string_stream *stream,
+			  const void *buf,
+			  const void *compared_buf,
+			  const size_t len);
+#endif
+
 #endif /*  _KUNIT_ASSERT_H */
-- 
cgit v1.2.3


From 020e6c22bd6e67592f38b47d0f1926a831482560 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 10 May 2024 15:36:57 -0700
Subject: kcsan: Add example to data_race() kerneldoc header

Although the data_race() kerneldoc header accurately states what it does,
some of the implications and usage patterns are non-obvious.  Therefore,
add a brief locking example and also state how to have KCSAN ignore
accesses while also preventing the compiler from folding, spindling,
or otherwise mutilating the access.

[ paulmck: Apply Bart Van Assche feedback. ]
[ paulmck: Apply feedback from Marco Elver. ]

Reported-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Marco Elver <elver@google.com>
Cc: Breno Leitao <leitao@debian.org>
Cc: Jens Axboe <axboe@kernel.dk>
---
 include/linux/compiler.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 8c252e073bd8..68a24a3a6979 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -194,9 +194,17 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
  * This data_race() macro is useful for situations in which data races
  * should be forgiven.  One example is diagnostic code that accesses
  * shared variables but is not a part of the core synchronization design.
+ * For example, if accesses to a given variable are protected by a lock,
+ * except for diagnostic code, then the accesses under the lock should
+ * be plain C-language accesses and those in the diagnostic code should
+ * use data_race().  This way, KCSAN will complain if buggy lockless
+ * accesses to that variable are introduced, even if the buggy accesses
+ * are protected by READ_ONCE() or WRITE_ONCE().
  *
  * This macro *does not* affect normal code generation, but is a hint
- * to tooling that data races here are to be ignored.
+ * to tooling that data races here are to be ignored.  If the access must
+ * be atomic *and* KCSAN should ignore the access, use both data_race()
+ * and READ_ONCE(), for example, data_race(READ_ONCE(x)).
  */
 #define data_race(expr)							\
 ({									\
-- 
cgit v1.2.3


From 73287fe228721b05690e671adbcccc6cf5435be6 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <thinker.li@gmail.com>
Date: Wed, 29 May 2024 23:59:39 -0700
Subject: bpf: pass bpf_struct_ops_link to callbacks in bpf_struct_ops.

Pass an additional pointer of bpf_struct_ops_link to callback function reg,
unreg, and update provided by subsystems defined in bpf_struct_ops. A
bpf_struct_ops_map can be registered for multiple links. Passing a pointer
of bpf_struct_ops_link helps subsystems to distinguish them.

This pointer will be used in the later patches to let the subsystem
initiate a detachment on a link that was registered to it previously.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
Link: https://lore.kernel.org/r/20240530065946.979330-2-thinker.li@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/linux/bpf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5e694a308081..19f8836382fc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1730,9 +1730,9 @@ struct bpf_struct_ops {
 	int (*init_member)(const struct btf_type *t,
 			   const struct btf_member *member,
 			   void *kdata, const void *udata);
-	int (*reg)(void *kdata);
-	void (*unreg)(void *kdata);
-	int (*update)(void *kdata, void *old_kdata);
+	int (*reg)(void *kdata, struct bpf_link *link);
+	void (*unreg)(void *kdata, struct bpf_link *link);
+	int (*update)(void *kdata, void *old_kdata, struct bpf_link *link);
 	int (*validate)(void *kdata);
 	void *cfi_stubs;
 	struct module *owner;
-- 
cgit v1.2.3


From 1adddc97aa44c8783f9f0276ea70854d56f9f6df Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <thinker.li@gmail.com>
Date: Wed, 29 May 2024 23:59:41 -0700
Subject: bpf: support epoll from bpf struct_ops links.

Add epoll support to bpf struct_ops links to trigger EPOLLHUP event upon
detachment.

This patch implements the "poll" of the "struct file_operations" for BPF
links and introduces a new "poll" operator in the "struct bpf_link_ops". By
implementing "poll" of "struct bpf_link_ops" for the links of struct_ops,
the file descriptor of a struct_ops link can be added to an epoll file
descriptor to receive EPOLLHUP events.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
Link: https://lore.kernel.org/r/20240530065946.979330-4-thinker.li@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 19f8836382fc..5eb61120e4f5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1612,6 +1612,7 @@ struct bpf_link_ops {
 			      struct bpf_link_info *info);
 	int (*update_map)(struct bpf_link *link, struct bpf_map *new_map,
 			  struct bpf_map *old_map);
+	__poll_t (*poll)(struct file *file, struct poll_table_struct *pts);
 };
 
 struct bpf_tramp_link {
-- 
cgit v1.2.3


From 67c3e8353f45c27800eecc46e00e8272f063f7d1 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <thinker.li@gmail.com>
Date: Wed, 29 May 2024 23:59:42 -0700
Subject: bpf: export bpf_link_inc_not_zero.

bpf_link_inc_not_zero() will be used by kernel modules.  We will use it in
bpf_testmod.c later.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
Link: https://lore.kernel.org/r/20240530065946.979330-5-thinker.li@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/linux/bpf.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5eb61120e4f5..a834f4b761bc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2334,6 +2334,7 @@ int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
 int bpf_link_settle(struct bpf_link_primer *primer);
 void bpf_link_cleanup(struct bpf_link_primer *primer);
 void bpf_link_inc(struct bpf_link *link);
+struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link);
 void bpf_link_put(struct bpf_link *link);
 int bpf_link_new_fd(struct bpf_link *link);
 struct bpf_link *bpf_link_get_from_fd(u32 ufd);
@@ -2705,6 +2706,11 @@ static inline void bpf_link_inc(struct bpf_link *link)
 {
 }
 
+static inline struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
+{
+	return NULL;
+}
+
 static inline void bpf_link_put(struct bpf_link *link)
 {
 }
-- 
cgit v1.2.3


From 9ec54934ce857065e38523a2010e20182e76f515 Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@wdc.com>
Date: Thu, 30 May 2024 17:25:07 +0300
Subject: scsi: ufs: core: Allow RTT negotiation

The rtt-upiu packets precede any data-out upiu packets, thus synchronizing
the data input to the device: this mostly applies to write operations, but
there are other operations that requires rtt as well.

There are several rules binding this rtt - data-out dialog, specifically
There can be at most outstanding bMaxNumOfRTT such packets.  This might
have an effect on write performance (sequential write in particular), as
each data-out upiu must wait for its rtt sibling.

UFSHCI expects bMaxNumOfRTT to be min(bDeviceRTTCap, NORTT). However, as of
today, there does not appears to be no-one who sets it: not the host
controller nor the driver.  It wasn't an issue up to now: bMaxNumOfRTT is
set to 2 after manufacturing, and wasn't limiting the write performance.

UFS4.0, and specifically gear 5 changes this, and requires the device to be
more attentive.  This doesn't come free - the device has to allocate more
resources to that end, but the sequential write performance improvement is
significant. Early measurements shows 25% gain when moving from rtt 2 to
9. Therefore, set bMaxNumOfRTT to be min(bDeviceRTTCap, NORTT) as UFSHCI
expects.

Signed-off-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20240530142510.734-2-avri.altman@wdc.com
Reviewed-by: Bean Huo <beanhuo@micron.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufs.h    | 2 ++
 include/ufs/ufshcd.h | 2 ++
 include/ufs/ufshci.h | 1 +
 3 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h
index b6003749bc83..853e95957c31 100644
--- a/include/ufs/ufs.h
+++ b/include/ufs/ufs.h
@@ -592,6 +592,8 @@ struct ufs_dev_info {
 	enum ufs_rtc_time rtc_type;
 	time64_t rtc_time_baseline;
 	u32 rtc_update_period;
+
+	u8 rtt_cap; /* bDeviceRTTCap */
 };
 
 /*
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index bad88bd91995..d74bd2d67b06 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -819,6 +819,7 @@ enum ufshcd_mcq_opr {
  * @capabilities: UFS Controller Capabilities
  * @mcq_capabilities: UFS Multi Circular Queue capabilities
  * @nutrs: Transfer Request Queue depth supported by controller
+ * @nortt - Max outstanding RTTs supported by controller
  * @nutmrs: Task Management Queue depth supported by controller
  * @reserved_slot: Used to submit device commands. Protected by @dev_cmd.lock.
  * @ufs_version: UFS Version to which controller complies
@@ -957,6 +958,7 @@ struct ufs_hba {
 
 	u32 capabilities;
 	int nutrs;
+	int nortt;
 	u32 mcq_capabilities;
 	int nutmrs;
 	u32 reserved_slot;
diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h
index 385e1c6b8d60..c50f92bf2e1d 100644
--- a/include/ufs/ufshci.h
+++ b/include/ufs/ufshci.h
@@ -68,6 +68,7 @@ enum {
 /* Controller capability masks */
 enum {
 	MASK_TRANSFER_REQUESTS_SLOTS		= 0x0000001F,
+	MASK_NUMBER_OUTSTANDING_RTT		= 0x0000FF00,
 	MASK_TASK_MANAGEMENT_REQUEST_SLOTS	= 0x00070000,
 	MASK_EHSLUTRD_SUPPORTED			= 0x00400000,
 	MASK_AUTO_HIBERN8_SUPPORT		= 0x00800000,
-- 
cgit v1.2.3


From e75ff63300c5e8fac31649f438ebad6af88e0032 Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@wdc.com>
Date: Thu, 30 May 2024 17:25:08 +0300
Subject: scsi: ufs: core: Maximum RTT supported by the host driver

Allow platform vendors to take precedence having their own max rtt support.
This makes sense because the host controller's nortt characteristic may
vary among vendors.

while at it, set this value for Mediatek, as requested by Peter -
https://lore.kernel.org/all/0a57d6bab739d6a10584f2baba115d00dfc9c94c.camel@mediatek.com/

Signed-off-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20240530142510.734-3-avri.altman@wdc.com
Reviewed-by: Peter Wang <peter.wang@mediatek.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index d74bd2d67b06..ef04ec8aad69 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -295,6 +295,7 @@ struct ufs_pwr_mode_info {
 /**
  * struct ufs_hba_variant_ops - variant specific callbacks
  * @name: variant name
+ * @max_num_rtt: maximum RTT supported by the host
  * @init: called when the driver is initialized
  * @exit: called to cleanup everything done in init
  * @get_ufs_hci_version: called to get UFS HCI version
@@ -332,6 +333,7 @@ struct ufs_pwr_mode_info {
  */
 struct ufs_hba_variant_ops {
 	const char *name;
+	int	max_num_rtt;
 	int	(*init)(struct ufs_hba *);
 	void    (*exit)(struct ufs_hba *);
 	u32	(*get_ufs_hci_version)(struct ufs_hba *);
-- 
cgit v1.2.3


From 2fc39848952dfb91a9233563cc1444669b8e79c3 Mon Sep 17 00:00:00 2001
From: Minwoo Im <minwoo.im@samsung.com>
Date: Mon, 20 May 2024 07:14:56 +0900
Subject: scsi: ufs: mcq: Fix missing argument 'hba' in MCQ_OPR_OFFSET_n

The MCQ_OPR_OFFSET_n macro takes 'hba' in the caller context without
receiving 'hba' instance as an argument.  To prevent potential bugs in
future use cases, add an argument 'hba'.

Fixes: 2468da61ea09 ("scsi: ufs: core: mcq: Configure operation and runtime interface")
Cc: Asutosh Das <quic_asutoshd@quicinc.com>
Signed-off-by: Minwoo Im <minwoo.im@samsung.com>
Link: https://lore.kernel.org/r/20240519221457.772346-2-minwoo.im@samsung.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index ef04ec8aad69..1150b86e9355 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1135,6 +1135,12 @@ static inline bool is_mcq_enabled(struct ufs_hba *hba)
 	return hba->mcq_enabled;
 }
 
+static inline unsigned int ufshcd_mcq_opr_offset(struct ufs_hba *hba,
+		enum ufshcd_mcq_opr opr, int idx)
+{
+	return hba->mcq_opr[opr].offset + hba->mcq_opr[opr].stride * idx;
+}
+
 #ifdef CONFIG_SCSI_UFS_VARIABLE_SG_ENTRY_SIZE
 static inline size_t ufshcd_sg_entry_size(const struct ufs_hba *hba)
 {
-- 
cgit v1.2.3


From e8a1d87b7983b461d1d625e2973cdaadc0bd8ff5 Mon Sep 17 00:00:00 2001
From: Minwoo Im <minwoo.im@samsung.com>
Date: Mon, 20 May 2024 07:14:57 +0900
Subject: scsi: ufs: mcq: Convert MCQ_CFG_n to an inline function

Inline functions are preferred over macros. Convert the MCQ_CFG_n macro to
an inline function.

Signed-off-by: Minwoo Im <minwoo.im@samsung.com>
Link: https://lore.kernel.org/r/20240519221457.772346-3-minwoo.im@samsung.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 1150b86e9355..df68fb1d4f3f 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1130,6 +1130,8 @@ struct ufs_hw_queue {
 	struct mutex sq_mutex;
 };
 
+#define MCQ_QCFG_SIZE		0x40
+
 static inline bool is_mcq_enabled(struct ufs_hba *hba)
 {
 	return hba->mcq_enabled;
@@ -1141,6 +1143,11 @@ static inline unsigned int ufshcd_mcq_opr_offset(struct ufs_hba *hba,
 	return hba->mcq_opr[opr].offset + hba->mcq_opr[opr].stride * idx;
 }
 
+static inline unsigned int ufshcd_mcq_cfg_offset(unsigned int reg, int idx)
+{
+	return reg + MCQ_QCFG_SIZE * idx;
+}
+
 #ifdef CONFIG_SCSI_UFS_VARIABLE_SG_ENTRY_SIZE
 static inline size_t ufshcd_sg_entry_size(const struct ufs_hba *hba)
 {
-- 
cgit v1.2.3


From a79d8fe2ff8e78e549dc86cc853a61b029404871 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 29 May 2024 12:09:08 +0800
Subject: ipv6: sr: restruct ifdefines

There are too many ifdef in IPv6 segment routing code that may cause logic
problems. like commit 160e9d275218 ("ipv6: sr: fix invalid unregister error
path"). To avoid this, the init functions are redefined for both cases. The
code could be more clear after all fidefs are removed.

Suggested-by: Simon Horman <horms@kernel.org>
Suggested-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20240529040908.3472952-1-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/seg6.h      | 7 +++++++
 include/net/seg6_hmac.h | 7 +++++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/net/seg6.h b/include/net/seg6.h
index af668f17b398..82b3fbbcbb93 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -52,10 +52,17 @@ static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
 
 extern int seg6_init(void);
 extern void seg6_exit(void);
+#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
 extern int seg6_iptunnel_init(void);
 extern void seg6_iptunnel_exit(void);
 extern int seg6_local_init(void);
 extern void seg6_local_exit(void);
+#else
+static inline int seg6_iptunnel_init(void) { return 0; }
+static inline void seg6_iptunnel_exit(void) {}
+static inline int seg6_local_init(void) { return 0; }
+static inline void seg6_local_exit(void) {}
+#endif
 
 extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced);
 extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags);
diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h
index 2b5d2ee5613e..24f733b3e3fe 100644
--- a/include/net/seg6_hmac.h
+++ b/include/net/seg6_hmac.h
@@ -49,9 +49,16 @@ extern int seg6_hmac_info_del(struct net *net, u32 key);
 extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr,
 			  struct ipv6_sr_hdr *srh);
 extern bool seg6_hmac_validate_skb(struct sk_buff *skb);
+#ifdef CONFIG_IPV6_SEG6_HMAC
 extern int seg6_hmac_init(void);
 extern void seg6_hmac_exit(void);
 extern int seg6_hmac_net_init(struct net *net);
 extern void seg6_hmac_net_exit(struct net *net);
+#else
+static inline int seg6_hmac_init(void) { return 0; }
+static inline void seg6_hmac_exit(void) {}
+static inline int seg6_hmac_net_init(struct net *net) { return 0; }
+static inline void seg6_hmac_net_exit(struct net *net) {}
+#endif
 
 #endif
-- 
cgit v1.2.3


From 02d00dc73d8d0613f82063ed53d67912a422c21e Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 29 May 2024 14:29:29 +0100
Subject: net: phylink: rename ovr_an_inband to default_an_inband

Since ovr_an_inband no longer overrides every MLO_AN_xxx mode, rename
it to reflect what it now does - it changes the default mode from
MLO_AN_PHY to MLO_AN_INBAND. Fix up the two users of this.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Halaney <ahalaney@redhat.com>
Link: https://lore.kernel.org/r/E1sCJMv-00Ecr1-Sk@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phylink.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 5ea6b2ad2396..a30a692acc32 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -141,7 +141,8 @@ enum phylink_op_type {
  * @mac_requires_rxc: if true, the MAC always requires a receive clock from PHY.
  *                    The PHY driver should start the clock signal as soon as
  *                    possible and avoid stopping it during suspend events.
- * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND
+ * @default_an_inband: if true, defaults to MLO_AN_INBAND rather than
+ *		       MLO_AN_PHY. A fixed-link specification will override.
  * @get_fixed_state: callback to execute to determine the fixed link state,
  *		     if MAC link is at %MLO_AN_FIXED mode.
  * @supported_interfaces: bitmap describing which PHY_INTERFACE_MODE_xxx
@@ -154,7 +155,7 @@ struct phylink_config {
 	bool poll_fixed_state;
 	bool mac_managed_pm;
 	bool mac_requires_rxc;
-	bool ovr_an_inband;
+	bool default_an_inband;
 	void (*get_fixed_state)(struct phylink_config *config,
 				struct phylink_link_state *state);
 	DECLARE_PHY_INTERFACE_MASK(supported_interfaces);
-- 
cgit v1.2.3


From 83f55b01dd903040d6ab64f4f9d78a27391a5916 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 29 May 2024 14:29:40 +0100
Subject: net: stmmac: rename xpcs_an_inband to default_an_inband

Rename xpcs_an_inband to default_an_inband to reflect the change in
phylink and its changed functionality.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Halaney <ahalaney@redhat.com>
Link: https://lore.kernel.org/r/E1sCJN6-00EcrD-43@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index f92c195c76ed..8f0f156d50d3 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -83,7 +83,7 @@ struct stmmac_priv;
 struct stmmac_mdio_bus_data {
 	unsigned int phy_mask;
 	unsigned int has_xpcs;
-	unsigned int xpcs_an_inband;
+	unsigned int default_an_inband;
 	int *irqs;
 	int probed_phy_irq;
 	bool needs_reset;
-- 
cgit v1.2.3


From 5607ca92e6274dfb85d0ff7c4e91e6c4ddb6d25c Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 31 May 2024 13:41:22 +0200
Subject: leds: core: Add led_mc_set_brightness() function

Add a new led_mc_set_brightness() function for in kernel color/brightness
changing of multi-color LEDs.

led-class-multicolor can be build as a module and led_mc_set_brightness()
will have the builtin callers, so put led_mc_set_brightness() inside
led-core instead, just like how led_set_brightness() is part of the core
and not of the led-class object.

This also adds a new LED_MULTI_COLOR led_classdev flag to allow
led_mc_set_brightness() to verify that it is operating on a multi-color
LED classdev, avoiding casting the passed in LED classdev to a multi-color
LED classdev, when it actually is not a multi-color LED.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://lore.kernel.org/r/20240531114124.45346-5-hdegoede@redhat.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/leds.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 6300313c46b7..ae07e44f1dcb 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -107,6 +107,7 @@ struct led_classdev {
 #define LED_BRIGHT_HW_CHANGED	BIT(21)
 #define LED_RETAIN_AT_SHUTDOWN	BIT(22)
 #define LED_INIT_DEFAULT_TRIGGER BIT(23)
+#define LED_MULTI_COLOR		BIT(24)
 
 	/* set_brightness_work / blink_timer flags, atomic, private. */
 	unsigned long		work_flags;
@@ -373,6 +374,25 @@ void led_set_brightness(struct led_classdev *led_cdev, unsigned int brightness);
  */
 int led_set_brightness_sync(struct led_classdev *led_cdev, unsigned int value);
 
+/**
+ * led_mc_set_brightness - set mc LED color intensity values and brightness
+ * @led_cdev: the LED to set
+ * @intensity_value: array of per color intensity values to set
+ * @num_colors: amount of entries in intensity_value array
+ * @brightness: the brightness to set the LED to
+ *
+ * Set a multi-color LED's per color intensity values and brightness.
+ * If necessary, this cancels the software blink timer. This function is
+ * guaranteed not to sleep.
+ *
+ * Calling this function on a non multi-color led_classdev or with the wrong
+ * num_colors value is an error. In this case an error will be logged once
+ * and the call will do nothing.
+ */
+void led_mc_set_brightness(struct led_classdev *led_cdev,
+			   unsigned int *intensity_value, unsigned int num_colors,
+			   unsigned int brightness);
+
 /**
  * led_update_brightness - update LED brightness
  * @led_cdev: the LED to query
-- 
cgit v1.2.3


From 0921a57c91648b08857b47a2f26fa7942f06120f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 31 May 2024 13:41:23 +0200
Subject: leds: trigger: Add led_mc_trigger_event() function

Add a new led_mc_trigger_event() function for triggers which want to
change the color of a multi-color LED based on their trigger conditions.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://lore.kernel.org/r/20240531114124.45346-6-hdegoede@redhat.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/leds.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index ae07e44f1dcb..517b6198df07 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -510,6 +510,9 @@ void led_trigger_register_simple(const char *name,
 				struct led_trigger **trigger);
 void led_trigger_unregister_simple(struct led_trigger *trigger);
 void led_trigger_event(struct led_trigger *trigger,  enum led_brightness event);
+void led_mc_trigger_event(struct led_trigger *trig,
+			  unsigned int *intensity_value, unsigned int num_colors,
+			  enum led_brightness brightness);
 void led_trigger_blink(struct led_trigger *trigger, unsigned long delay_on,
 		       unsigned long delay_off);
 void led_trigger_blink_oneshot(struct led_trigger *trigger,
@@ -552,6 +555,9 @@ static inline void led_trigger_register_simple(const char *name,
 static inline void led_trigger_unregister_simple(struct led_trigger *trigger) {}
 static inline void led_trigger_event(struct led_trigger *trigger,
 				enum led_brightness event) {}
+static inline void led_mc_trigger_event(struct led_trigger *trig,
+				unsigned int *intensity_value, unsigned int num_colors,
+				enum led_brightness brightness) {}
 static inline void led_trigger_blink(struct led_trigger *trigger,
 				      unsigned long delay_on,
 				      unsigned long delay_off) {}
-- 
cgit v1.2.3


From 9af12f57f1f9785f231d31a7365ad244c656b7ff Mon Sep 17 00:00:00 2001
From: Kate Hsuan <hpa@redhat.com>
Date: Fri, 31 May 2024 13:41:24 +0200
Subject: power: supply: power-supply-leds: Add charging_orange_full_green
 trigger for RGB LED

Add a charging_orange_full_green LED trigger and the trigger is based on
led_mc_trigger_event() which can set an RGB LED when the trigger is
triggered. The LED will show orange when the battery status is charging.
The LED will show green when the battery status is full.

Link: https://lore.kernel.org/linux-leds/f40a0b1a-ceac-e269-c2dd-0158c5b4a1ad@gmail.com/
Signed-off-by: Kate Hsuan <hpa@redhat.com>
Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
[hdegoede@redhat.com change color order to RGB]
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20240531114124.45346-7-hdegoede@redhat.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/power_supply.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 8e5705a56b85..c852cc882501 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -319,6 +319,8 @@ struct power_supply {
 	char *online_trig_name;
 	struct led_trigger *charging_blink_full_solid_trig;
 	char *charging_blink_full_solid_trig_name;
+	struct led_trigger *charging_orange_full_green_trig;
+	char *charging_orange_full_green_trig_name;
 #endif
 };
 
-- 
cgit v1.2.3


From b44d79d6bad16c30978c2cee3421133d3f181494 Mon Sep 17 00:00:00 2001
From: Tero Kristo <tero.kristo@linux.intel.com>
Date: Mon, 27 May 2024 16:29:33 +0300
Subject: platform/x86/intel/tpmi: Add support for performance limit reasons
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add TPMI ID 0x0C (Perf Limit Reasons) to the list of supported TPMI IDs.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Tero Kristo <tero.kristo@linux.intel.com>
Link: https://lore.kernel.org/r/20240527133400.483634-2-tero.kristo@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/intel_tpmi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h
index 1e880cb0f454..a88ac937d2c2 100644
--- a/include/linux/intel_tpmi.h
+++ b/include/linux/intel_tpmi.h
@@ -21,6 +21,7 @@ enum intel_tpmi_id {
 	TPMI_ID_PEM = 1,	/* Power and Perf excursion Monitor */
 	TPMI_ID_UNCORE = 2,	/* Uncore Frequency Scaling */
 	TPMI_ID_SST = 5,	/* Speed Select Technology */
+	TPMI_ID_PLR = 0xc,	/* Performance Limit Reasons */
 	TPMI_CONTROL_ID = 0x80,	/* Special ID for getting feature status */
 	TPMI_INFO_ID = 0x81,	/* Special ID for PCI BDF and Package ID information */
 };
-- 
cgit v1.2.3


From d36842bacf8e3491f555059f27de57b3436cc3ff Mon Sep 17 00:00:00 2001
From: Tero Kristo <tero.kristo@linux.intel.com>
Date: Mon, 27 May 2024 16:29:34 +0300
Subject: platform/x86/intel/tpmi: Add API to get debugfs root
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add new API to get the debugfs root directory for TPMI. This allows any
TPMI devices to add their own debugfs items under the same directory
structure.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Tero Kristo <tero.kristo@linux.intel.com>
Link: https://lore.kernel.org/r/20240527133400.483634-3-tero.kristo@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/intel_tpmi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h
index a88ac937d2c2..ff480b47ae64 100644
--- a/include/linux/intel_tpmi.h
+++ b/include/linux/intel_tpmi.h
@@ -54,4 +54,5 @@ struct resource *tpmi_get_resource_at_index(struct auxiliary_device *auxdev, int
 int tpmi_get_resource_count(struct auxiliary_device *auxdev);
 int tpmi_get_feature_status(struct auxiliary_device *auxdev, int feature_id, bool *read_blocked,
 			    bool *write_blocked);
+struct dentry *tpmi_get_debugfs_dir(struct auxiliary_device *auxdev);
 #endif
-- 
cgit v1.2.3


From 4d2bcefa965b06a1f2be6912456bcfa86a34f184 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 31 May 2024 13:29:02 +0100
Subject: mm: Reduce the number of slab->folio casts

Mark a few more folio functions as taking a const folio pointer, which
allows us to remove a few places in slab which cast away the const.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/mm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9849dfda44d4..a983d371fafa 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1105,7 +1105,7 @@ static inline unsigned int compound_order(struct page *page)
  *
  * Return: The order of the folio.
  */
-static inline unsigned int folio_order(struct folio *folio)
+static inline unsigned int folio_order(const struct folio *folio)
 {
 	if (!folio_test_large(folio))
 		return 0;
@@ -2145,7 +2145,7 @@ static inline struct folio *folio_next(struct folio *folio)
  * it from being split.  It is not necessary for the folio to be locked.
  * Return: The base-2 logarithm of the size of this folio.
  */
-static inline unsigned int folio_shift(struct folio *folio)
+static inline unsigned int folio_shift(const struct folio *folio)
 {
 	return PAGE_SHIFT + folio_order(folio);
 }
@@ -2158,7 +2158,7 @@ static inline unsigned int folio_shift(struct folio *folio)
  * it from being split.  It is not necessary for the folio to be locked.
  * Return: The number of bytes in this folio.
  */
-static inline size_t folio_size(struct folio *folio)
+static inline size_t folio_size(const struct folio *folio)
 {
 	return PAGE_SIZE << folio_order(folio);
 }
-- 
cgit v1.2.3


From b7c5e64fecfa88764791679cca4786ac65de739e Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 29 May 2024 22:52:30 -0600
Subject: vfio: Create vfio_fs_type with inode per device

By linking all the device fds we provide to userspace to an
address space through a new pseudo fs, we can use tools like
unmap_mapping_range() to zap all vmas associated with a device.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20240530045236.1005864-2-alex.williamson@redhat.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 8b1a29820409..000a6cab2d31 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -64,6 +64,7 @@ struct vfio_device {
 	struct completion comp;
 	struct iommufd_access *iommufd_access;
 	void (*put_kvm)(struct kvm *kvm);
+	struct inode *inode;
 #if IS_ENABLED(CONFIG_IOMMUFD)
 	struct iommufd_device *iommufd_device;
 	u8 iommufd_attached:1;
-- 
cgit v1.2.3


From aac6db75a9fc2c7a6f73e152df8f15101dda38e6 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 29 May 2024 22:52:31 -0600
Subject: vfio/pci: Use unmap_mapping_range()

With the vfio device fd tied to the address space of the pseudo fs
inode, we can use the mm to track all vmas that might be mmap'ing
device BARs, which removes our vma_list and all the complicated lock
ordering necessary to manually zap each related vma.

Note that we can no longer store the pfn in vm_pgoff if we want to use
unmap_mapping_range() to zap a selective portion of the device fd
corresponding to BAR mappings.

This also converts our mmap fault handler to use vmf_insert_pfn()
because we no longer have a vma_list to avoid the concurrency problem
with io_remap_pfn_range().  The goal is to eventually use the vm_ops
huge_fault handler to avoid the additional faulting overhead, but
vmf_insert_pfn_{pmd,pud}() need to learn about pfnmaps first.

Also, Jason notes that a race exists between unmap_mapping_range() and
the fops mmap callback if we were to call io_remap_pfn_range() to
populate the vma on mmap.  Specifically, mmap_region() does call_mmap()
before it does vma_link_file() which gives a window where the vma is
populated but invisible to unmap_mapping_range().

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20240530045236.1005864-3-alex.williamson@redhat.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio_pci_core.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index a2c8b8bba711..f87067438ed4 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -93,8 +93,6 @@ struct vfio_pci_core_device {
 	struct list_head		sriov_pfs_item;
 	struct vfio_pci_core_device	*sriov_pf_core_dev;
 	struct notifier_block	nb;
-	struct mutex		vma_lock;
-	struct list_head	vma_list;
 	struct rw_semaphore	memory_lock;
 };
 
-- 
cgit v1.2.3


From 055afc34fd219c8e2290d736ad186edd58870a9e Mon Sep 17 00:00:00 2001
From: Unnathi Chalicheemala <quic_uchalich@quicinc.com>
Date: Fri, 31 May 2024 09:45:25 -0700
Subject: soc: qcom: llcc: Add regmap for Broadcast_AND region

Until SM8450, there was only one broadcast region (Broadcast_OR)
used to broadcast write and check for status bit 0.
>From SM8450 onwards another broadcast region (Broadcast_AND) has been
added which checks for status bit 1. This hasn't been updated and
Broadcast_OR region was wrongly being used to check for status bit 1 all
along.

Hence define new regmap structure for Broadcast_AND region and initialize
this regmap when HW block version is greater than 4.1, otherwise
initialize as a NULL pointer for backwards compatibility.
Switch from broadcast_OR to broadcast_AND region (when defined in DT)
for checking status bit 1 as Broadcast_OR region checks only for bit 0.

Signed-off-by: Unnathi Chalicheemala <quic_uchalich@quicinc.com>
Reviewed-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Link: https://lore.kernel.org/r/9cf19928a67eaa577ae0f02de5bf86276be34ea2.1717014052.git.quic_uchalich@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/llcc-qcom.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 1a886666bbb6..9e9f528b1370 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -115,7 +115,8 @@ struct llcc_edac_reg_offset {
 /**
  * struct llcc_drv_data - Data associated with the llcc driver
  * @regmaps: regmaps associated with the llcc device
- * @bcast_regmap: regmap associated with llcc broadcast offset
+ * @bcast_regmap: regmap associated with llcc broadcast OR offset
+ * @bcast_and_regmap: regmap associated with llcc broadcast AND offset
  * @cfg: pointer to the data structure for slice configuration
  * @edac_reg_offset: Offset of the LLCC EDAC registers
  * @lock: mutex associated with each slice
@@ -129,6 +130,7 @@ struct llcc_edac_reg_offset {
 struct llcc_drv_data {
 	struct regmap **regmaps;
 	struct regmap *bcast_regmap;
+	struct regmap *bcast_and_regmap;
 	const struct llcc_slice_config *cfg;
 	const struct llcc_edac_reg_offset *edac_reg_offset;
 	struct mutex lock;
-- 
cgit v1.2.3


From 69c8b998717c03adeada070882512ebeae11d71f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 29 May 2024 09:29:22 -0700
Subject: net: qstat: extend kdoc about get_base_stats

mlx5 has a dedicated queue for PTP packets. Clarify that
this sort of queues can also be accounted towards the base.

Reviewed-by: Joe Damato <jdamato@fastly.com>
Link: https://lore.kernel.org/r/20240529162922.3690698-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/netdev_queues.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index a8a7e48dfa6c..5ca019d294ca 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -62,6 +62,8 @@ struct netdev_queue_stats_tx {
  * statistics will not generally add up to the total number of events for
  * the device. The @get_base_stats callback allows filling in the delta
  * between events for currently live queues and overall device history.
+ * @get_base_stats can also be used to report any miscellaneous packets
+ * transferred outside of the main set of queues used by the networking stack.
  * When the statistics for the entire device are queried, first @get_base_stats
  * is issued to collect the delta, and then a series of per-queue callbacks.
  * Only statistics which are set in @get_base_stats will be reported
-- 
cgit v1.2.3


From 964a504b1261b641797d93cdbf79b68ff7d85720 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 31 May 2024 15:47:00 +0200
Subject: power: supply: leds: Add power_supply_[un]register_led_trigger()

Add power_supply_[un]register_led_trigger() helper functions.

The primary goal of this is as a preparation patch for adding an activate
callback to the power-supply LED triggers to ensure that power-supply
LEDs get the correct initial value when the LED gets registered after
the power_supply has been registered (this will use the psy back pointer).

There also is quite a lot of code duplication in the existing LED trigger
registration in the form of the kasprintf() for the name-template for each
trigger + related error handling. This duplication is removed by these
new helpers.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://lore.kernel.org/r/20240531134702.166145-2-hdegoede@redhat.com
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index c852cc882501..e218a8ab72ee 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -310,17 +310,11 @@ struct power_supply {
 
 #ifdef CONFIG_LEDS_TRIGGERS
 	struct led_trigger *charging_full_trig;
-	char *charging_full_trig_name;
 	struct led_trigger *charging_trig;
-	char *charging_trig_name;
 	struct led_trigger *full_trig;
-	char *full_trig_name;
 	struct led_trigger *online_trig;
-	char *online_trig_name;
 	struct led_trigger *charging_blink_full_solid_trig;
-	char *charging_blink_full_solid_trig_name;
 	struct led_trigger *charging_orange_full_green_trig;
-	char *charging_orange_full_green_trig_name;
 #endif
 };
 
-- 
cgit v1.2.3


From 6c951a84dab9c0e051aec54d7497f25e910a4953 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 31 May 2024 15:47:01 +0200
Subject: power: supply: leds: Share trig pointer for online and charging_full

Either 5 different LED triggers are registered for battery power-supply
devices or a single online LED trigger is used for non battery power-supply
devices.

These 5 / 1 LED trigger(s) are never used at the same time. So there is
no need for a separate LED trigger pointer for the online trigger. Rename
the first battery trigger from charging_full_trig to just trig and use this
for the online trigger too.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://lore.kernel.org/r/20240531134702.166145-3-hdegoede@redhat.com
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index e218a8ab72ee..65082ef75692 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -309,10 +309,9 @@ struct power_supply {
 #endif
 
 #ifdef CONFIG_LEDS_TRIGGERS
-	struct led_trigger *charging_full_trig;
+	struct led_trigger *trig;
 	struct led_trigger *charging_trig;
 	struct led_trigger *full_trig;
-	struct led_trigger *online_trig;
 	struct led_trigger *charging_blink_full_solid_trig;
 	struct led_trigger *charging_orange_full_green_trig;
 #endif
-- 
cgit v1.2.3


From a14a569a9918a0c7e340257a17dbc088bb27db72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Wed, 29 May 2024 08:27:11 +0200
Subject: platform/chrome: cros_ec_proto: Introduce cros_ec_cmd_readmem()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To read from the EC memory different mechanism are possible.
ECs connected via LPC expose their memory via a ->cmd_readmem operation.
Other protocols require the usage of EC_CMD_READ_MEMMAP, which on the
other hand is not implemented by LPC ECs.

Provide a helper that automatically selects the correct mechanism.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20240529-cros_ec-hwmon-v4-1-5cdf0c5db50a@weissschuh.net
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 include/linux/platform_data/cros_ec_proto.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 8865e350c12a..1ddc52603f9a 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -261,6 +261,8 @@ int cros_ec_get_sensor_count(struct cros_ec_dev *ec);
 int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, const void *outdata,
 		    size_t outsize, void *indata, size_t insize);
 
+int cros_ec_cmd_readmem(struct cros_ec_device *ec_dev, u8 offset, u8 size, void *dest);
+
 /**
  * cros_ec_get_time_ns() - Return time in ns.
  *
-- 
cgit v1.2.3


From 6b2e29977518ec13ef3022f234ff8f3014c243da Mon Sep 17 00:00:00 2001
From: Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com>
Date: Mon, 13 May 2024 16:08:02 +0530
Subject: timekeeping: Provide infrastructure for converting to/from a base
 clock

Hardware time stamps like provided by PTP clock implementations are based
on a clock which feeds both the PCIe device and the system clock. For
further processing the underlying hardwarre clock timestamp must be
converted to the system clock.

Right now this requires drivers to invoke an architecture specific
conversion function, e.g. to convert the ART (Always Running Timer)
timestamp to a TSC timestamp.

As the system clock is aware of the underlying base clock, this can be
moved to the core code by providing a base clock property for the system
clock which contains the conversion factors and assigning a clocksource ID
to the base clock.

Add the required data structures and the conversion infrastructure in the
core code to prepare for converting X86 and the related PTP drivers over.

[ tglx: Added a missing READ_ONCE(). Massaged change log ]

Co-developed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Co-developed-by: Christopher S. Hall <christopher.s.hall@intel.com>
Signed-off-by: Christopher S. Hall <christopher.s.hall@intel.com>
Signed-off-by: Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240513103813.5666-2-lakshmi.sowjanya.d@intel.com
---
 include/linux/clocksource.h | 27 +++++++++++++++++++++++++++
 include/linux/timekeeping.h |  2 ++
 2 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 0ad8b550bb4b..d35b677b08fe 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -21,6 +21,7 @@
 #include <asm/div64.h>
 #include <asm/io.h>
 
+struct clocksource_base;
 struct clocksource;
 struct module;
 
@@ -50,6 +51,7 @@ struct module;
  *			multiplication
  * @name:		Pointer to clocksource name
  * @list:		List head for registration (internal)
+ * @freq_khz:		Clocksource frequency in khz.
  * @rating:		Rating value for selection (higher is better)
  *			To avoid rating inflation the following
  *			list should give you a guide as to how
@@ -70,6 +72,8 @@ struct module;
  *			validate the clocksource from which the snapshot was
  *			taken.
  * @flags:		Flags describing special properties
+ * @base:		Hardware abstraction for clock on which a clocksource
+ *			is based
  * @enable:		Optional function to enable the clocksource
  * @disable:		Optional function to disable the clocksource
  * @suspend:		Optional suspend function for the clocksource
@@ -107,10 +111,12 @@ struct clocksource {
 	u64			max_cycles;
 	const char		*name;
 	struct list_head	list;
+	u32			freq_khz;
 	int			rating;
 	enum clocksource_ids	id;
 	enum vdso_clock_mode	vdso_clock_mode;
 	unsigned long		flags;
+	struct clocksource_base *base;
 
 	int			(*enable)(struct clocksource *cs);
 	void			(*disable)(struct clocksource *cs);
@@ -306,4 +312,25 @@ static inline unsigned int clocksource_get_max_watchdog_retry(void)
 
 void clocksource_verify_percpu(struct clocksource *cs);
 
+/**
+ * struct clocksource_base - hardware abstraction for clock on which a clocksource
+ *			is based
+ * @id:			Defaults to CSID_GENERIC. The id value is used for conversion
+ *			functions which require that the current clocksource is based
+ *			on a clocksource_base with a particular ID in certain snapshot
+ *			functions to allow callers to validate the clocksource from
+ *			which the snapshot was taken.
+ * @freq_khz:		Nominal frequency of the base clock in kHz
+ * @offset:		Offset between the base clock and the clocksource
+ * @numerator:		Numerator of the clock ratio between base clock and the clocksource
+ * @denominator:	Denominator of the clock ratio between base clock and the clocksource
+ */
+struct clocksource_base {
+	enum clocksource_ids	id;
+	u32			freq_khz;
+	u64			offset;
+	u32			numerator;
+	u32			denominator;
+};
+
 #endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 0ea7823b7f31..b2ee182d891e 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -310,10 +310,12 @@ struct system_device_crosststamp {
  *		timekeeping code to verify comparability of two cycle values.
  *		The default ID, CSID_GENERIC, does not identify a specific
  *		clocksource.
+ * @use_nsecs:	@cycles is in nanoseconds.
  */
 struct system_counterval_t {
 	u64			cycles;
 	enum clocksource_ids	cs_id;
+	bool			use_nsecs;
 };
 
 /*
-- 
cgit v1.2.3


From 3a52886c8f972c3a5b70bfec330c71817cd7fc63 Mon Sep 17 00:00:00 2001
From: Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com>
Date: Mon, 13 May 2024 16:08:03 +0530
Subject: x86/tsc: Provide ART base clock information for TSC

The core code provides a new mechanism to allow conversion between ART and
TSC. This allows to replace the x86 specific ART/TSC conversion functions.

Prepare for removal by filling in the base clock conversion information for
ART and associating the base clock to the TSC clocksource.

The existing conversion functions will be removed once the usage sites are
converted over to the new model.

[ tglx: Massaged change log ]

Co-developed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Co-developed-by: Christopher S. Hall <christopher.s.hall@intel.com>
Signed-off-by: Christopher S. Hall <christopher.s.hall@intel.com>
Signed-off-by: Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240513103813.5666-3-lakshmi.sowjanya.d@intel.com
---
 include/linux/clocksource_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h
index a4fa3436940c..2bb4d8c2f1b0 100644
--- a/include/linux/clocksource_ids.h
+++ b/include/linux/clocksource_ids.h
@@ -9,6 +9,7 @@ enum clocksource_ids {
 	CSID_X86_TSC_EARLY,
 	CSID_X86_TSC,
 	CSID_X86_KVM_CLK,
+	CSID_X86_ART,
 	CSID_MAX,
 };
 
-- 
cgit v1.2.3


From 02ecee07ca30f76f2a0f1381661a688b8e501ab0 Mon Sep 17 00:00:00 2001
From: Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com>
Date: Mon, 13 May 2024 16:08:10 +0530
Subject: timekeeping: Add function to convert realtime to base clock

PPS (Pulse Per Second) generates a hardware pulse every second based on
CLOCK_REALTIME. This works fine when the pulse is generated in software
from a hrtimer callback function.

For hardware which generates the pulse by programming a timer it is
required to convert CLOCK_REALTIME to the underlying hardware clock.

The X86 Timed IO device is based on the Always Running Timer (ART), which
is the base clock of the TSC, which is usually the system clocksource on
X86.

The core code already has functionality to convert base clock timestamps to
system clocksource timestamps, but there is no support for converting the
other way around.

Provide the required functionality to support such devices in a generic
way to avoid code duplication in drivers:

  1) ktime_real_to_base_clock() to convert a CLOCK_REALTIME timestamp to a
     base clock timestamp

  2) timekeeping_clocksource_has_base() to allow drivers to validate that
     the system clocksource is based on a particular clocksource ID.

[ tglx: Simplify timekeeping_clocksource_has_base() and add missing READ_ONCE() ]

Co-developed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Co-developed-by: Christopher S. Hall <christopher.s.hall@intel.com>
Signed-off-by: Christopher S. Hall <christopher.s.hall@intel.com>
Signed-off-by: Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240513103813.5666-10-lakshmi.sowjanya.d@intel.com
---
 include/linux/timekeeping.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index b2ee182d891e..fc12a9ba2c88 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -318,6 +318,10 @@ struct system_counterval_t {
 	bool			use_nsecs;
 };
 
+extern bool ktime_real_to_base_clock(ktime_t treal,
+				     enum clocksource_ids base_id, u64 *cycles);
+extern bool timekeeping_clocksource_has_base(enum clocksource_ids id);
+
 /*
  * Get cross timestamp between system clock and device clock
  */
-- 
cgit v1.2.3


From 56f45266df67aa0f5b2a6881c8c4d16dbfff6b7d Mon Sep 17 00:00:00 2001
From: Ken Sloat <ksloat@designlinxhs.com>
Date: Thu, 15 Dec 2022 16:07:15 +0000
Subject: pwm: xilinx: Fix u32 overflow issue in 32-bit width PWM mode.

This timer HW supports 8, 16 and 32-bit timer widths. This
driver currently uses a u32 to store the max possible value
of the timer. However, statements perform addition of 2 in
xilinx_pwm_apply() when calculating the period_cycles and
duty_cycles values. Since priv->max is a u32, this will
result in an overflow to 1 which will not only be incorrect
but fail on range comparison. This results in making it
impossible to set the PWM in this timer mode.

There are two obvious solutions to the current problem:
1. Cast each instance where overflow occurs to u64.
2. Change priv->max from a u32 to a u64.

Solution #1 requires more code modifications, and leaves
opportunity to introduce similar overflows if other math
statements are added in the future. These may also go
undetected if running in non 32-bit timer modes.

Solution #2 is the much smaller and cleaner approach and
thus the chosen method in this patch.

This was tested on a Zynq UltraScale+ with multiple
instances of the PWM IP.

Signed-off-by: Ken Sloat <ksloat@designlinxhs.com>
Reviewed-by: Michal Simek <michal.simek@amd.com>
Reviewed-by: Sean Anderson <sean.anderson@seco.com>
Link: https://lore.kernel.org/r/SJ0P222MB0107490C5371B848EF04351CA1E19@SJ0P222MB0107.NAMP222.PROD.OUTLOOK.COM
Signed-off-by: Michal Simek <michal.simek@amd.com>
---
 include/clocksource/timer-xilinx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/clocksource/timer-xilinx.h b/include/clocksource/timer-xilinx.h
index c0f56fe6d22a..d116f18de899 100644
--- a/include/clocksource/timer-xilinx.h
+++ b/include/clocksource/timer-xilinx.h
@@ -41,7 +41,7 @@ struct regmap;
 struct xilinx_timer_priv {
 	struct regmap *map;
 	struct clk *clk;
-	u32 max;
+	u64 max;
 };
 
 /**
-- 
cgit v1.2.3


From fcf544ac64397776a18246eba5a8ca72a47c4405 Mon Sep 17 00:00:00 2001
From: Jay Buddhabhatti <jay.buddhabhatti@amd.com>
Date: Wed, 24 Apr 2024 05:49:00 -0700
Subject: soc: xilinx: Add cb event for subsystem restart

Add support to register subsystem restart events from firmware for Versal
and Versal NET platforms. This event is received when firmware requests
for subsystem restart. After receiving this event, the kernel needs to be
restarted.

Signed-off-by: Jay Buddhabhatti <jay.buddhabhatti@amd.com>
Link: https://lore.kernel.org/r/20240424124900.29287-1-jay.buddhabhatti@amd.com
Signed-off-by: Michal Simek <michal.simek@amd.com>
---
 include/linux/firmware/xlnx-event-manager.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware/xlnx-event-manager.h b/include/linux/firmware/xlnx-event-manager.h
index 82e8254b0f80..645dd34155e6 100644
--- a/include/linux/firmware/xlnx-event-manager.h
+++ b/include/linux/firmware/xlnx-event-manager.h
@@ -1,4 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Xilinx Event Management Driver
+ *
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
 
 #ifndef _FIRMWARE_XLNX_EVENT_MANAGER_H_
 #define _FIRMWARE_XLNX_EVENT_MANAGER_H_
@@ -7,6 +12,11 @@
 
 #define CB_MAX_PAYLOAD_SIZE	(4U) /*In payload maximum 32bytes */
 
+#define EVENT_SUBSYSTEM_RESTART		(4U)
+
+#define PM_DEV_ACPU_0_0			(0x1810c0afU)
+#define PM_DEV_ACPU_0			(0x1810c003U)
+
 /************************** Exported Function *****************************/
 
 typedef void (*event_cb_func_t)(const u32 *payload, void *data);
-- 
cgit v1.2.3


From 494c55a1ec0ab40198cf43f5a41c7c5e0b70e7fc Mon Sep 17 00:00:00 2001
From: Ronak Jain <ronak.jain@amd.com>
Date: Thu, 25 Apr 2024 02:59:13 -0700
Subject: firmware: xilinx: Move FIRMWARE_VERSION_MASK to xlnx-zynqmp.h

Move FIRMWARE_VERSION_MASK macro to xlnx-zynqmp.h so that other
drivers can use it for verifying the supported firmware version.

Signed-off-by: Ronak Jain <ronak.jain@amd.com>
Signed-off-by: Anand Ashok Dumbre <anand.ashok.dumbre@amd.com>
Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
Link: https://lore.kernel.org/r/20240425095913.919390-1-ronak.jain@amd.com
Signed-off-by: Michal Simek <michal.simek@amd.com>
---
 include/linux/firmware/xlnx-zynqmp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 1a069a56c961..d7d07afc0532 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -52,6 +52,9 @@
 #define API_ID_MASK		GENMASK(7, 0)
 #define MODULE_ID_MASK		GENMASK(11, 8)
 
+/* Firmware feature check version mask */
+#define FIRMWARE_VERSION_MASK		0xFFFFU
+
 /* ATF only commands */
 #define TF_A_PM_REGISTER_SGI		0xa04
 #define PM_GET_TRUSTZONE_VERSION	0xa03
-- 
cgit v1.2.3


From a5b7365f28c191df6b93f60942d2b9a9fe71746c Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 3 Jun 2024 14:58:41 +0800
Subject: soundwire: bus: add stream refcount

The notion of stream is by construction based on a multi-bus
capability, to allow for aggregation of Peripheral devices or
functions located on different segments. We currently count how many
master_rt contexts are used by a stream, but we don't have the dual
refcount of how many streams are allocated on a given bus. This
refcount will be useful to check if BTP/BRA streams can be allocated.

Note that the stream_refcount is modified in sdw_master_rt_alloc() and
sdw_master_rt_free() which are both called with the bus_lock mutex
held, so there's no need for refcount_ primitives for additional
protection.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20240603065841.4860-2-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 13e96d8b7423..94fc1b57c57b 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -903,6 +903,7 @@ struct sdw_master_ops {
  * meaningful if multi_link is set. If set to 1, hardware-based
  * synchronization will be used even if a stream only uses a single
  * SoundWire segment.
+ * @stream_refcount: number of streams currently using this bus
  */
 struct sdw_bus {
 	struct device *dev;
@@ -933,6 +934,7 @@ struct sdw_bus {
 	u32 bank_switch_timeout;
 	bool multi_link;
 	int hw_sync_min_links;
+	int stream_refcount;
 };
 
 int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent,
-- 
cgit v1.2.3


From 9b5fd115e7d5a98b82054cff5c96f6768ee06845 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 3 Jun 2024 15:02:40 +0800
Subject: soundwire: intel_ace2.x: add AC timing extensions for PantherLake
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ACE3 IP used in PantherLake exposes new bitfields in the ACTMCTL
register to better control clocks/delays. These bitfields were
reserved/zero in the ACE2.x IP, to simplify the integration the new
bifields are added unconditionally. The behavior will only be impacted
when the firmware exposes DSD properties to set non-zero values.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20240603070240.5165-1-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw_intel.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 8e78417156e3..d537587b4499 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -182,6 +182,11 @@
 #define SDW_SHIM2_INTEL_VS_ACTMCTL_DODSE	BIT(2)
 #define SDW_SHIM2_INTEL_VS_ACTMCTL_DOAIS	GENMASK(4, 3)
 #define SDW_SHIM2_INTEL_VS_ACTMCTL_DOAISE	BIT(5)
+#define SDW_SHIM3_INTEL_VS_ACTMCTL_CLSS		BIT(6)
+#define SDW_SHIM3_INTEL_VS_ACTMCTL_CLDS		GENMASK(11, 7)
+#define SDW_SHIM3_INTEL_VS_ACTMCTL_DODSE2	GENMASK(13, 12)
+#define SDW_SHIM3_INTEL_VS_ACTMCTL_DOAISE2	BIT(14)
+#define SDW_SHIM3_INTEL_VS_ACTMCTL_CLDE		BIT(15)
 
 /**
  * struct sdw_intel_stream_params_data: configuration passed during
-- 
cgit v1.2.3


From 32fe91524e1651b9a0e11ddfbc63de9aa485cf48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Mon, 13 May 2024 11:25:18 +0200
Subject: sysctl: constify ctl_table arguments of utility function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In a future commit the proc_handlers themselves will change to
"const struct ctl_table". As a preparation for that adapt the internal
helper.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Joel Granados <j.granados@samsung.com>
---
 include/linux/sysctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 09db2f2e6488..54fbec062772 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -237,7 +237,7 @@ extern struct ctl_table_header *register_sysctl_mount_point(const char *path);
 
 void do_sysctl_args(void);
 bool sysctl_is_alias(char *param);
-int do_proc_douintvec(struct ctl_table *table, int write,
+int do_proc_douintvec(const struct ctl_table *table, int write,
 		      void *buffer, size_t *lenp, loff_t *ppos,
 		      int (*conv)(unsigned long *lvalp,
 				  unsigned int *valp,
-- 
cgit v1.2.3


From 45919c28134519080a85a5fb66d0f65955ef7572 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 28 May 2024 05:05:14 +0000
Subject: ASoC: simple-card-utils: remove both playback/capture_only check

soc-pcm.c :: soc_get_playback_capture() will indicate error
if both playback_only / capture_only were true.

Thus, graph_util_parse_link_direction() which setup playback_only /
capture_only don't need to check it.
And, its return value is not used on existing driver. Let's remove it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://msgid.link/r/87a5kah6gm.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/simple_card_utils.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h
index 2c2279d082ec..0a6435ac5c5f 100644
--- a/include/sound/simple_card_utils.h
+++ b/include/sound/simple_card_utils.h
@@ -197,7 +197,7 @@ int graph_util_is_ports0(struct device_node *port);
 int graph_util_parse_dai(struct device *dev, struct device_node *ep,
 			 struct snd_soc_dai_link_component *dlc, int *is_single_link);
 
-int graph_util_parse_link_direction(struct device_node *np,
+void graph_util_parse_link_direction(struct device_node *np,
 				    bool *is_playback_only, bool *is_capture_only);
 
 #ifdef DEBUG
-- 
cgit v1.2.3


From 3d9a0a25336433d48ddca1e1f4fea25fd4a3b1d8 Mon Sep 17 00:00:00 2001
From: Sreenath Vijayan <sreenath.vijayan@sony.com>
Date: Thu, 30 May 2024 13:15:47 +0530
Subject: printk: Rename console_replay_all() and update context

Rename console_replay_all() to console_try_replay_all() to make
clear that the implementation is best effort. Also, the function
should not be called in NMI context as it takes locks, so update
the comment in code.

Fixes: 693f75b91a91 ("printk: Add function to replay kernel log on consoles")
Fixes: 1b743485e27f ("tty/sysrq: Replay kernel log messages on consoles via sysrq")
Suggested-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Shimoyashiki Taichi <taichi.shimoyashiki@sony.com>
Signed-off-by: Sreenath Vijayan <sreenath.vijayan@sony.com>
Link: https://lore.kernel.org/r/Zlguq/wU21Z8MqI4@sreenath.vijayan@sony.com
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/printk.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 40afab23881a..ca4c9271daf6 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -195,7 +195,7 @@ void show_regs_print_info(const char *log_lvl);
 extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
 extern asmlinkage void dump_stack(void) __cold;
 void printk_trigger_flush(void);
-void console_replay_all(void);
+void console_try_replay_all(void);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -275,7 +275,7 @@ static inline void dump_stack(void)
 static inline void printk_trigger_flush(void)
 {
 }
-static inline void console_replay_all(void)
+static inline void console_try_replay_all(void)
 {
 }
 #endif
-- 
cgit v1.2.3


From 96a02b9fa95108269cd0cd012f091f86bd6f41a4 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@alien8.de>
Date: Wed, 15 May 2024 17:08:04 +0200
Subject: KVM: Unexport kvm_debugfs_dir

After

  faf01aef0570 ("KVM: PPC: Merge powerpc's debugfs entry content into generic entry")

kvm_debugfs_dir is not used anywhere else outside of kvm_main.c

Unexport it and make it static.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20240515150804.9354-1-bp@kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 692c01e41a18..c80fe03a1fa4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1955,8 +1955,6 @@ struct _kvm_stats_desc {
 			HALT_POLL_HIST_COUNT),				       \
 	STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking)
 
-extern struct dentry *kvm_debugfs_dir;
-
 ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
 		       const struct _kvm_stats_desc *desc,
 		       void *stats, size_t size_stats,
-- 
cgit v1.2.3


From 61df7b82820494368bd46071ca97e43a3dfc3b11 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Thu, 2 May 2024 17:57:51 -0400
Subject: lsm: fixup the inode xattr capability handling

The current security_inode_setxattr() and security_inode_removexattr()
hooks rely on individual LSMs to either call into the associated
capability hooks (cap_inode_setxattr() or cap_inode_removexattr()), or
return a magic value of 1 to indicate that the LSM layer itself should
perform the capability checks.  Unfortunately, with the default return
value for these LSM hooks being 0, an individual LSM hook returning a
1 will cause the LSM hook processing to exit early, potentially
skipping a LSM.  Thankfully, with the exception of the BPF LSM, none
of the LSMs which currently register inode xattr hooks should end up
returning a value of 1, and in the BPF LSM case, with the BPF LSM hooks
executing last there should be no real harm in stopping processing of
the LSM hooks.  However, the reliance on the individual LSMs to either
call the capability hooks themselves, or signal the LSM with a return
value of 1, is fragile and relies on a specific set of LSMs being
enabled.  This patch is an effort to resolve, or minimize, these
issues.

Before we discuss the solution, there are a few observations and
considerations that we need to take into account:
* BPF LSM registers an implementation for every LSM hook, and that
  implementation simply returns the hook's default return value, a
  0 in this case.  We want to ensure that the default BPF LSM behavior
  results in the capability checks being called.
* SELinux and Smack do not expect the traditional capability checks
  to be applied to the xattrs that they "own".
* SELinux and Smack are currently written in such a way that the
  xattr capability checks happen before any additional LSM specific
  access control checks.  SELinux does apply SELinux specific access
  controls to all xattrs, even those not "owned" by SELinux.
* IMA and EVM also register xattr hooks but assume that the LSM layer
  and specific LSMs have already authorized the basic xattr operation.

In order to ensure we perform the capability based access controls
before the individual LSM access controls, perform only one capability
access control check for each operation, and clarify the logic around
applying the capability controls, we need a mechanism to determine if
any of the enabled LSMs "own" a particular xattr and want to take
responsibility for controlling access to that xattr.  The solution in
this patch is to create a new LSM hook, 'inode_xattr_skipcap', that is
not exported to the rest of the kernel via a security_XXX() function,
but is used by the LSM layer to determine if a LSM wants to control
access to a given xattr and avoid the traditional capability controls.
Registering an inode_xattr_skipcap hook is optional, if a LSM declines
to register an implementation, or uses an implementation that simply
returns the default value (0), there is no effect as the LSM continues
to enforce the capability based controls (unless another LSM takes
ownership of the xattr).  If none of the LSMs signal that the
capability checks should be skipped, the capability check is performed
and if access is granted the individual LSM xattr access control hooks
are executed, keeping with the DAC-before-LSM convention.

Cc: stable@vger.kernel.org
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index f804b76cde44..8fd87f823d3a 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -144,6 +144,7 @@ LSM_HOOK(int, 0, inode_setattr, struct mnt_idmap *idmap, struct dentry *dentry,
 LSM_HOOK(void, LSM_RET_VOID, inode_post_setattr, struct mnt_idmap *idmap,
 	 struct dentry *dentry, int ia_valid)
 LSM_HOOK(int, 0, inode_getattr, const struct path *path)
+LSM_HOOK(int, 0, inode_xattr_skipcap, const char *name)
 LSM_HOOK(int, 0, inode_setxattr, struct mnt_idmap *idmap,
 	 struct dentry *dentry, const char *name, const void *value,
 	 size_t size, int flags)
-- 
cgit v1.2.3


From 32d99593bdc91442fe6183d97b060210cc9c8193 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 16 Mar 2024 00:10:15 -0700
Subject: rcu: Add lockdep_assert_in_rcu_read_lock() and friends

There is no direct RCU counterpart to lockdep_assert_irqs_disabled()
and friends.  Although it is possible to construct them, it would
be more convenient to have the following lockdep assertions:

lockdep_assert_in_rcu_read_lock()
lockdep_assert_in_rcu_read_lock_bh()
lockdep_assert_in_rcu_read_lock_sched()
lockdep_assert_in_rcu_reader()

This commit therefore creates them.

Reported-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcupdate.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index dfd2399f2cde..8470a85f6563 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -421,11 +421,71 @@ static inline void rcu_preempt_sleep_check(void) { }
 				 "Illegal context switch in RCU-sched read-side critical section"); \
 	} while (0)
 
+// See RCU_LOCKDEP_WARN() for an explanation of the double call to
+// debug_lockdep_rcu_enabled().
+static inline bool lockdep_assert_rcu_helper(bool c)
+{
+	return debug_lockdep_rcu_enabled() &&
+	       (c || !rcu_is_watching() || !rcu_lockdep_current_cpu_online()) &&
+	       debug_lockdep_rcu_enabled();
+}
+
+/**
+ * lockdep_assert_in_rcu_read_lock - WARN if not protected by rcu_read_lock()
+ *
+ * Splats if lockdep is enabled and there is no rcu_read_lock() in effect.
+ */
+#define lockdep_assert_in_rcu_read_lock() \
+	WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map)))
+
+/**
+ * lockdep_assert_in_rcu_read_lock_bh - WARN if not protected by rcu_read_lock_bh()
+ *
+ * Splats if lockdep is enabled and there is no rcu_read_lock_bh() in effect.
+ * Note that local_bh_disable() and friends do not suffice here, instead an
+ * actual rcu_read_lock_bh() is required.
+ */
+#define lockdep_assert_in_rcu_read_lock_bh() \
+	WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_bh_lock_map)))
+
+/**
+ * lockdep_assert_in_rcu_read_lock_sched - WARN if not protected by rcu_read_lock_sched()
+ *
+ * Splats if lockdep is enabled and there is no rcu_read_lock_sched()
+ * in effect.  Note that preempt_disable() and friends do not suffice here,
+ * instead an actual rcu_read_lock_sched() is required.
+ */
+#define lockdep_assert_in_rcu_read_lock_sched() \
+	WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_sched_lock_map)))
+
+/**
+ * lockdep_assert_in_rcu_reader - WARN if not within some type of RCU reader
+ *
+ * Splats if lockdep is enabled and there is no RCU reader of any
+ * type in effect.  Note that regions of code protected by things like
+ * preempt_disable, local_bh_disable(), and local_irq_disable() all qualify
+ * as RCU readers.
+ *
+ * Note that this will never trigger in PREEMPT_NONE or PREEMPT_VOLUNTARY
+ * kernels that are not also built with PREEMPT_COUNT.  But if you have
+ * lockdep enabled, you might as well also enable PREEMPT_COUNT.
+ */
+#define lockdep_assert_in_rcu_reader()								\
+	WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map) &&			\
+					       !lock_is_held(&rcu_bh_lock_map) &&		\
+					       !lock_is_held(&rcu_sched_lock_map) &&		\
+					       preemptible()))
+
 #else /* #ifdef CONFIG_PROVE_RCU */
 
 #define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c))
 #define rcu_sleep_check() do { } while (0)
 
+#define lockdep_assert_in_rcu_read_lock() do { } while (0)
+#define lockdep_assert_in_rcu_read_lock_bh() do { } while (0)
+#define lockdep_assert_in_rcu_read_lock_sched() do { } while (0)
+#define lockdep_assert_in_rcu_reader() do { } while (0)
+
 #endif /* #else #ifdef CONFIG_PROVE_RCU */
 
 /*
-- 
cgit v1.2.3


From ce418966a83320837f59f04a646fa4716e132dc8 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 25 Apr 2024 16:18:32 +0200
Subject: rcu/nocb: Fix segcblist state machine comments about bypass

The parts explaining the bypass lifecycle in (de-)offloading are out
of date and/or wrong. Bypass is simply enabled whenever SEGCBLIST_RCU_CORE
flag is off.

Fix the comments accordingly.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcu_segcblist.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 659d13a7ddaa..1e5b4ef167ca 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -141,7 +141,7 @@ struct rcu_cblist {
  *  |                           SEGCBLIST_KTHREAD_GP                           |
  *  |                                                                          |
  *  |   CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core()    |
- *  |   handles callbacks concurrently. Bypass enqueue is enabled.             |
+ *  |   handles callbacks concurrently. Bypass enqueue is disabled.            |
  *  |   Invoke RCU core so we make sure not to preempt it in the middle with   |
  *  |   leaving some urgent work unattended within a jiffy.                    |
  *  ----------------------------------------------------------------------------
@@ -154,8 +154,7 @@ struct rcu_cblist {
  *  |                           SEGCBLIST_KTHREAD_GP                           |
  *  |                                                                          |
  *  |   CB/GP kthreads and local rcu_core() handle callbacks concurrently      |
- *  |   holding nocb_lock. Wake up CB and GP kthreads if necessary. Disable    |
- *  |   bypass enqueue.                                                        |
+ *  |   holding nocb_lock. Wake up CB and GP kthreads if necessary.            |
  *  ----------------------------------------------------------------------------
  *                                      |
  *                                      v
@@ -185,7 +184,7 @@ struct rcu_cblist {
  *  |                                                                          |
  *  | Callbacks processed by rcu_core() from softirqs or local                 |
  *  | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed.    |
- *  | Flush pending nocb_timer. Flush nocb bypass callbacks.                   |
+ *  | Flush pending nocb_timer.                                                |
  *  ----------------------------------------------------------------------------
  *                                      |
  *                                      v
-- 
cgit v1.2.3


From aa97b9a56906f5965a7c5752790d174cadc8b820 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 25 Apr 2024 16:18:33 +0200
Subject: rcu/nocb: Fix segcblist state machine stale comments about timers

The (de-)offloading process used to take care about the NOCB timer when
it depended on the per-rdp NOCB locking. However this isn't the case
anymore for a long while. It can now safely be armed and run during the
(de-)offloading process, which doesn't care about it anymore.

Update the comments accordingly.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcu_segcblist.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 1e5b4ef167ca..8018045989af 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -80,8 +80,7 @@ struct rcu_cblist {
  *  |       SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED       |
  *  |                                                                          |
  *  | Callbacks processed by rcu_core() from softirqs or local                 |
- *  | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads,     |
- *  | allowing nocb_timer to be armed.                                         |
+ *  | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads.     |
  *  ----------------------------------------------------------------------------
  *                                         |
  *                                         v
@@ -183,8 +182,7 @@ struct rcu_cblist {
  *  |                SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING                    |
  *  |                                                                          |
  *  | Callbacks processed by rcu_core() from softirqs or local                 |
- *  | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed.    |
- *  | Flush pending nocb_timer.                                                |
+ *  | rcuc kthread, while holding nocb_lock.                                   |
  *  ----------------------------------------------------------------------------
  *                                      |
  *                                      v
-- 
cgit v1.2.3


From 483d5bf23125a9127ffcb3f7a3b3539b34df67d4 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 25 Apr 2024 16:18:34 +0200
Subject: rcu/nocb: Use kthread parking instead of ad-hoc implementation

Upon NOCB deoffloading, the rcuo kthread must be forced to sleep
until the corresponding rdp is ever offloaded again. The deoffloader
clears the SEGCBLIST_OFFLOADED flag, wakes up the rcuo kthread which
then notices that change and clears in turn its SEGCBLIST_KTHREAD_CB
flag before going to sleep, until it ever sees the SEGCBLIST_OFFLOADED
flag again, should a re-offloading happen.

Upon NOCB offloading, the rcuo kthread must be forced to wake up and
handle callbacks until the corresponding rdp is ever deoffloaded again.
The offloader sets the SEGCBLIST_OFFLOADED flag, wakes up the rcuo
kthread which then notices that change and sets in turn its
SEGCBLIST_KTHREAD_CB flag before going to check callbacks, until it
ever sees the SEGCBLIST_OFFLOADED flag cleared again, should a
de-offloading happen again.

This is all a crude ad-hoc and error-prone kthread (un-)parking
re-implementation.

Consolidate the behaviour with the appropriate API instead.

[ paulmck: Apply Qiang Zhang feedback provided in Link: below. ]
Link: https://lore.kernel.org/all/20240509074046.15629-1-qiang.zhang1211@gmail.com/

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcu_segcblist.h | 81 +++++++++++++++++++------------------------
 1 file changed, 36 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 8018045989af..ba95c06675e1 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -84,31 +84,31 @@ struct rcu_cblist {
  *  ----------------------------------------------------------------------------
  *                                         |
  *                                         v
- *                        -----------------------------------
- *                        |                                 |
- *                        v                                 v
- *  ---------------------------------------  ----------------------------------|
- *  |        SEGCBLIST_RCU_CORE   |       |  |     SEGCBLIST_RCU_CORE   |      |
- *  |        SEGCBLIST_LOCKING    |       |  |     SEGCBLIST_LOCKING    |      |
- *  |        SEGCBLIST_OFFLOADED  |       |  |     SEGCBLIST_OFFLOADED  |      |
- *  |        SEGCBLIST_KTHREAD_CB         |  |     SEGCBLIST_KTHREAD_GP        |
- *  |                                     |  |                                 |
- *  |                                     |  |                                 |
- *  | CB kthread woke up and              |  | GP kthread woke up and          |
- *  | acknowledged SEGCBLIST_OFFLOADED.   |  | acknowledged SEGCBLIST_OFFLOADED|
- *  | Processes callbacks concurrently    |  |                                 |
- *  | with rcu_core(), holding            |  |                                 |
- *  | nocb_lock.                          |  |                                 |
- *  ---------------------------------------  -----------------------------------
- *                        |                                 |
- *                        -----------------------------------
+ *  ----------------------------------------------------------------------------
+ *  |        SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED      |
+ *  |                              + unparked CB kthread                       |
+ *  |                                                                          |
+ *  | CB kthread got unparked and processes callbacks concurrently with        |
+ *  | rcu_core(), holding nocb_lock.                                           |
+ *  ---------------------------------------------------------------------------
+ *                                         |
+ *                                         v
+ *  ---------------------------------------------------------------------------|
+ *  |                           SEGCBLIST_RCU_CORE |                           |
+ *  |                           SEGCBLIST_LOCKING |                            |
+ *  |                           SEGCBLIST_OFFLOADED |                          |
+ *  |                           SEGCBLIST_KTHREAD_GP                           |
+ *  |                           + unparked CB kthread                          |
+ *  |                                                                          |
+ *  | GP kthread woke up and acknowledged nocb_lock.                           |
+ *  ---------------------------------------- -----------------------------------
  *                                         |
  *                                         v
  *  |--------------------------------------------------------------------------|
- *  |                           SEGCBLIST_LOCKING    |                         |
- *  |                           SEGCBLIST_OFFLOADED  |                         |
+ *  |                           SEGCBLIST_LOCKING |                            |
+ *  |                           SEGCBLIST_OFFLOADED |                          |
  *  |                           SEGCBLIST_KTHREAD_GP |                         |
- *  |                           SEGCBLIST_KTHREAD_CB                           |
+ *  |                           + unparked CB kthread                          |
  *  |                                                                          |
  *  |   Kthreads handle callbacks holding nocb_lock, local rcu_core() stops    |
  *  |   handling callbacks. Enable bypass queueing.                            |
@@ -124,8 +124,8 @@ struct rcu_cblist {
  *  |--------------------------------------------------------------------------|
  *  |                           SEGCBLIST_LOCKING    |                         |
  *  |                           SEGCBLIST_OFFLOADED  |                         |
- *  |                           SEGCBLIST_KTHREAD_CB |                         |
  *  |                           SEGCBLIST_KTHREAD_GP                           |
+ *  |                           + unparked CB kthread                          |
  *  |                                                                          |
  *  |   CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core()    |
  *  |   ignores callbacks. Bypass enqueue is enabled.                          |
@@ -136,8 +136,8 @@ struct rcu_cblist {
  *  |                           SEGCBLIST_RCU_CORE   |                         |
  *  |                           SEGCBLIST_LOCKING    |                         |
  *  |                           SEGCBLIST_OFFLOADED  |                         |
- *  |                           SEGCBLIST_KTHREAD_CB |                         |
  *  |                           SEGCBLIST_KTHREAD_GP                           |
+ *  |                           + unparked CB kthread                          |
  *  |                                                                          |
  *  |   CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core()    |
  *  |   handles callbacks concurrently. Bypass enqueue is disabled.            |
@@ -149,40 +149,31 @@ struct rcu_cblist {
  *  |--------------------------------------------------------------------------|
  *  |                           SEGCBLIST_RCU_CORE   |                         |
  *  |                           SEGCBLIST_LOCKING    |                         |
- *  |                           SEGCBLIST_KTHREAD_CB |                         |
  *  |                           SEGCBLIST_KTHREAD_GP                           |
+ *  |                           + unparked CB kthread                          |
  *  |                                                                          |
  *  |   CB/GP kthreads and local rcu_core() handle callbacks concurrently      |
- *  |   holding nocb_lock. Wake up CB and GP kthreads if necessary.            |
+ *  |   holding nocb_lock. Wake up GP kthread if necessary.                    |
  *  ----------------------------------------------------------------------------
  *                                      |
  *                                      v
- *                     -----------------------------------
- *                     |                                 |
- *                     v                                 v
- *  ---------------------------------------------------------------------------|
- *  |                                     |                                    |
- *  |        SEGCBLIST_RCU_CORE |         |       SEGCBLIST_RCU_CORE |         |
- *  |        SEGCBLIST_LOCKING  |         |       SEGCBLIST_LOCKING  |         |
- *  |        SEGCBLIST_KTHREAD_CB         |       SEGCBLIST_KTHREAD_GP         |
- *  |                                     |                                    |
- *  | GP kthread woke up and              |   CB kthread woke up and           |
- *  | acknowledged the fact that          |   acknowledged the fact that       |
- *  | SEGCBLIST_OFFLOADED got cleared.    |   SEGCBLIST_OFFLOADED got cleared. |
- *  |                                     |   The CB kthread goes to sleep     |
- *  | The callbacks from the target CPU   |   until it ever gets re-offloaded. |
- *  | will be ignored from the GP kthread |                                    |
- *  | loop.                               |                                    |
+ *  |--------------------------------------------------------------------------|
+ *  |                           SEGCBLIST_RCU_CORE   |                         |
+ *  |                           SEGCBLIST_LOCKING    |                         |
+ *  |                           + unparked CB kthread                          |
+ *  |                                                                          |
+ *  |   GP kthread woke up and acknowledged the fact that SEGCBLIST_OFFLOADED  |
+ *  |   got cleared. The callbacks from the target CPU will be ignored from the|
+ *  |   GP kthread loop.                                                       |
  *  ----------------------------------------------------------------------------
- *                      |                                 |
- *                      -----------------------------------
  *                                      |
  *                                      v
  *  ----------------------------------------------------------------------------
  *  |                SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING                    |
+ *  |                          + parked CB kthread                             |
  *  |                                                                          |
- *  | Callbacks processed by rcu_core() from softirqs or local                 |
- *  | rcuc kthread, while holding nocb_lock.                                   |
+ *  | CB kthread is parked. Callbacks processed by rcu_core() from softirqs or |
+ *  | local rcuc kthread, while holding nocb_lock.                             |
  *  ----------------------------------------------------------------------------
  *                                      |
  *                                      v
-- 
cgit v1.2.3


From 9855c37edf0009cc276cecfee09f7e76e2380212 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 25 Apr 2024 16:24:04 +0200
Subject: Revert "rcu-tasks: Fix synchronize_rcu_tasks() VS
 zap_pid_ns_processes()"

This reverts commit 28319d6dc5e2ffefa452c2377dd0f71621b5bff0. The race
it fixed was subject to conditions that don't exist anymore since:

	1612160b9127 ("rcu-tasks: Eliminate deadlocks involving do_exit() and RCU tasks")

This latter commit removes the use of SRCU that used to cover the
RCU-tasks blind spot on exit between the tasklist's removal and the
final preemption disabling. The task is now placed instead into a
temporary list inside which voluntary sleeps are accounted as RCU-tasks
quiescent states. This would disarm the deadlock initially reported
against PID namespace exit.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcupdate.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index dfd2399f2cde..61cb3de236af 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -209,7 +209,6 @@ void synchronize_rcu_tasks_rude(void);
 
 #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
 void exit_tasks_rcu_start(void);
-void exit_tasks_rcu_stop(void);
 void exit_tasks_rcu_finish(void);
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
 #define rcu_tasks_classic_qs(t, preempt) do { } while (0)
@@ -218,7 +217,6 @@ void exit_tasks_rcu_finish(void);
 #define call_rcu_tasks call_rcu
 #define synchronize_rcu_tasks synchronize_rcu
 static inline void exit_tasks_rcu_start(void) { }
-static inline void exit_tasks_rcu_stop(void) { }
 static inline void exit_tasks_rcu_finish(void) { }
 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
 
-- 
cgit v1.2.3


From 4ce6e8a859f0503d97aac6869bc3b1a24b15601d Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 28 May 2024 13:33:29 -0700
Subject: hwmon: Add PEC attribute support to hardware monitoring core

Several hardware monitoring chips optionally support Packet Error Checking
(PEC). For some chips, PEC support can be enabled simply by setting
I2C_CLIENT_PEC in the i2c client data structure. Others require chip
specific code to enable or disable PEC support.

Introduce hwmon_chip_pec and HWMON_C_PEC to simplify adding configurable
PEC support for hardware monitoring drivers. A driver can set HWMON_C_PEC
in its chip information data to indicate PEC support. If a chip requires
chip specific code to enable or disable PEC support, the driver only needs
to implement support for the hwmon_chip_pec attribute to its write
function.

Packet Error Checking is only supported for SMBus devices. HWMON_C_PEC
must therefore only be set by a driver if the parent device is an I2C
device. Attempts to set HWMON_C_PEC on any other device type is not
supported and rejected.

The code calls i2c_check_functionality() to check if PEC is supported
by the I2C/SMBus controller. This function is only available if CONFIG_I2C
is enabled and reachable. For this reason, the added code needs to depend
on reachability of CONFIG_I2C.

Cc: Radu Sabau <radu.sabau@analog.com>
Acked-by: Nuno Sa <nuno.sa@analog.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/hwmon.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index edf96f249eb5..e94314760aab 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -45,6 +45,7 @@ enum hwmon_chip_attributes {
 	hwmon_chip_power_samples,
 	hwmon_chip_temp_samples,
 	hwmon_chip_beep_enable,
+	hwmon_chip_pec,
 };
 
 #define HWMON_C_TEMP_RESET_HISTORY	BIT(hwmon_chip_temp_reset_history)
@@ -60,6 +61,7 @@ enum hwmon_chip_attributes {
 #define HWMON_C_POWER_SAMPLES		BIT(hwmon_chip_power_samples)
 #define HWMON_C_TEMP_SAMPLES		BIT(hwmon_chip_temp_samples)
 #define HWMON_C_BEEP_ENABLE		BIT(hwmon_chip_beep_enable)
+#define HWMON_C_PEC			BIT(hwmon_chip_pec)
 
 enum hwmon_temp_attributes {
 	hwmon_temp_enable,
-- 
cgit v1.2.3


From d0edc54455398248154062664f7d1b35e6ec6e01 Mon Sep 17 00:00:00 2001
From: Bingbu Cao <bingbu.cao@intel.com>
Date: Fri, 10 May 2024 14:27:47 +0800
Subject: media: ipu-bridge: add mod_devicetable.h header inclusion

ACPI_ID_LEN is defined in mod_devicetable.h, so the header should
be guaranteed to included in ipu-bridge.h instead of the source
files which include ipu-bridge.h.

Signed-off-by: Bingbu Cao <bingbu.cao@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/media/ipu-bridge.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/media/ipu-bridge.h b/include/media/ipu-bridge.h
index 783bda6d5cc3..16fac765456e 100644
--- a/include/media/ipu-bridge.h
+++ b/include/media/ipu-bridge.h
@@ -3,6 +3,7 @@
 #ifndef __IPU_BRIDGE_H
 #define __IPU_BRIDGE_H
 
+#include <linux/mod_devicetable.h>
 #include <linux/property.h>
 #include <linux/types.h>
 #include <media/v4l2-fwnode.h>
-- 
cgit v1.2.3


From 1d7804281df3f09f0a109d00406e859a00bae7ae Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Wed, 24 Apr 2024 18:39:07 +0300
Subject: media: subdev: Fix use of sd->enabled_streams in call_s_stream()

call_s_stream() uses sd->enabled_streams to track whether streaming has
already been enabled. However,
v4l2_subdev_enable/disable_streams_fallback(), which was the original
user of this field, already uses it, and
v4l2_subdev_enable/disable_streams_fallback() will call call_s_stream().

This leads to a conflict as both functions set the field. Afaics, both
functions set the field to the same value, so it won't cause a runtime
bug, but it's still wrong and if we, e.g., change how
v4l2_subdev_enable/disable_streams_fallback() operates we might easily
cause bugs.

Fix this by adding a new field, 's_stream_enabled', for
call_s_stream().

Reviewed-by: Umang Jain <umang.jain@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Tested-by: Umang Jain <umang.jain@ideasonboard.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/media/v4l2-subdev.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index e30c463d90e5..d3f15882927b 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -1045,6 +1045,8 @@ struct v4l2_subdev_platform_data {
  *		     v4l2_subdev_enable_streams() and
  *		     v4l2_subdev_disable_streams() helper functions for fallback
  *		     cases.
+ * @s_stream_enabled: Tracks whether streaming has been enabled with s_stream.
+ *                    This is only for call_s_stream() internal use.
  *
  * Each instance of a subdev driver should create this struct, either
  * stand-alone or embedded in a larger struct.
@@ -1093,6 +1095,7 @@ struct v4l2_subdev {
 	 */
 	struct v4l2_subdev_state *active_state;
 	u64 enabled_streams;
+	bool s_stream_enabled;
 };
 
 
-- 
cgit v1.2.3


From 61d6c8c896c1ccde350c281817847a32b0c6b83b Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Wed, 24 Apr 2024 18:39:08 +0300
Subject: media: subdev: Improve v4l2_subdev_enable/disable_streams_fallback

v4l2_subdev_enable/disable_streams_fallback() supports falling back to
.s_stream() for subdevs with a single source pad. It also tracks the
enabled streams for that one pad in the sd->enabled_streams field.

Tracking the enabled streams with sd->enabled_streams does not make
sense, as with .s_stream() there can only be a single stream per pad.
Thus, as the v4l2_subdev_enable/disable_streams_fallback() only supports
a single source pad, all we really need is a boolean which tells whether
streaming has been enabled on this pad or not.

However, as we only need a true/false state for a pad (instead of
tracking which streams have been enabled for a pad), we can easily
extend the fallback mechanism to support multiple source pads as we only
need to keep track of which pads have been enabled.

Change the sd->enabled_streams field to sd->enabled_pads, which is a
64-bit bitmask tracking the enabled source pads. With this change we can
remove the restriction that
v4l2_subdev_enable/disable_streams_fallback() only supports a single
source pad.

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Tested-by: Umang Jain <umang.jain@ideasonboard.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/media/v4l2-subdev.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index d3f15882927b..f191cf00c840 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -1041,10 +1041,9 @@ struct v4l2_subdev_platform_data {
  * @active_state: Active state for the subdev (NULL for subdevs tracking the
  *		  state internally). Initialized by calling
  *		  v4l2_subdev_init_finalize().
- * @enabled_streams: Bitmask of enabled streams used by
- *		     v4l2_subdev_enable_streams() and
- *		     v4l2_subdev_disable_streams() helper functions for fallback
- *		     cases.
+ * @enabled_pads: Bitmask of enabled pads used by v4l2_subdev_enable_streams()
+ *		  and v4l2_subdev_disable_streams() helper functions for
+ *		  fallback cases.
  * @s_stream_enabled: Tracks whether streaming has been enabled with s_stream.
  *                    This is only for call_s_stream() internal use.
  *
@@ -1094,7 +1093,7 @@ struct v4l2_subdev {
 	 * doesn't support it.
 	 */
 	struct v4l2_subdev_state *active_state;
-	u64 enabled_streams;
+	u64 enabled_pads;
 	bool s_stream_enabled;
 };
 
-- 
cgit v1.2.3


From 5f3ce14fae742d1d23061c3122d93edb879ebf53 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Wed, 24 Apr 2024 18:39:09 +0300
Subject: media: subdev: Add v4l2_subdev_is_streaming()

Add a helper function which returns whether the subdevice is streaming,
i.e. if .s_stream or .enable_streams has been called successfully.

Reviewed-by: Umang Jain <umang.jain@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Tested-by: Umang Jain <umang.jain@ideasonboard.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/media/v4l2-subdev.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index f191cf00c840..23c970d09870 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -1956,4 +1956,17 @@ extern const struct v4l2_subdev_ops v4l2_subdev_call_wrappers;
 void v4l2_subdev_notify_event(struct v4l2_subdev *sd,
 			      const struct v4l2_event *ev);
 
+/**
+ * v4l2_subdev_is_streaming() - Returns if the subdevice is streaming
+ * @sd: The subdevice
+ *
+ * v4l2_subdev_is_streaming() tells if the subdevice is currently streaming.
+ * "Streaming" here means whether .s_stream() or .enable_streams() has been
+ * successfully called, and the streaming has not yet been disabled.
+ *
+ * If the subdevice implements .enable_streams() this function must be called
+ * while holding the active state lock.
+ */
+bool v4l2_subdev_is_streaming(struct v4l2_subdev *sd);
+
 #endif /* _V4L2_SUBDEV_H */
-- 
cgit v1.2.3


From f8e9662e4da6d4e394a3d9cf0d335863650d712d Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Wed, 24 Apr 2024 18:39:14 +0300
Subject: media: subdev: Improve s_stream documentation

Now that enable/disable_streams operations are available for
single-stream subdevices too, there's no reason to use the old s_stream
operation on new drivers. Extend the documentation reflecting this.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Reviewed-by: Umang Jain<umang.jain@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/media/v4l2-subdev.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 23c970d09870..df66365576dd 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -450,6 +450,15 @@ enum v4l2_subdev_pre_streamon_flags {
  *	already started or stopped subdev. Also see call_s_stream wrapper in
  *	v4l2-subdev.c.
  *
+ *	New drivers should instead implement &v4l2_subdev_pad_ops.enable_streams
+ *	and &v4l2_subdev_pad_ops.disable_streams operations, and use
+ *	v4l2_subdev_s_stream_helper for the &v4l2_subdev_video_ops.s_stream
+ *	operation to support legacy users.
+ *
+ *	Drivers should also not call the .s_stream() subdev operation directly,
+ *	but use the v4l2_subdev_enable_streams() and
+ *	v4l2_subdev_disable_streams() helpers.
+ *
  * @g_pixelaspect: callback to return the pixelaspect ratio.
  *
  * @s_rx_buffer: set a host allocated memory buffer for the subdev. The subdev
-- 
cgit v1.2.3


From e73412fdeb541e777b3fd50b665781c1856422b5 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Thu, 9 May 2024 00:40:43 +0300
Subject: media: v4l2-subdev: Fix v4l2_subdev_state_get_format() documentation

The documentation of the v4l2_subdev_state_get_format() macro
incorrectly references __v4l2_subdev_state_get_format() instead of
__v4l2_subdev_state_gen_call(). Fix it, and also update the list of
similar macros to add the missing v4l2_subdev_state_get_interval().

Suggested-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/media/v4l2-subdev.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index df66365576dd..ed339f0116bf 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -1351,12 +1351,12 @@ void v4l2_subdev_cleanup(struct v4l2_subdev *sd);
  */
 /*
  * Wrap v4l2_subdev_state_get_format(), allowing the function to be called with
- * two or three arguments. The purpose of the __v4l2_subdev_state_get_format()
+ * two or three arguments. The purpose of the __v4l2_subdev_state_gen_call()
  * macro below is to come up with the name of the function or macro to call,
  * using the last two arguments (_stream and _pad). The selected function or
  * macro is then called using the arguments specified by the caller. A similar
- * arrangement is used for v4l2_subdev_state_crop() and
- * v4l2_subdev_state_compose() below.
+ * arrangement is used for v4l2_subdev_state_crop(), v4l2_subdev_state_compose()
+ * and v4l2_subdev_state_get_interval() below.
  */
 #define v4l2_subdev_state_get_format(state, pad, ...)			\
 	__v4l2_subdev_state_gen_call(format, ##__VA_ARGS__, , _pad)	\
-- 
cgit v1.2.3


From 85af84852f115d3c9d4b45b0500315e630baa82c Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Thu, 9 May 2024 00:40:43 +0300
Subject: media: v4l2-subdev: Provide const-aware subdev state accessors

It would be useful to mark instances of v4l2_subdev_state structures as
const when code needs to access them read-only. This isn't currently
possible, as the v4l2_subdev_state_get_*() accessor functions take a
non-const pointer to the state.

Use _Generic() to provide two different versions of the accessors, for
const and non-const states respectively. The former returns a const
pointer to the requested format, rectangle or interval, implementing
const-correctness. The latter returns a non-const pointer, preserving
the current behaviour for drivers.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
[Sakari Ailus: Drop the word "below" from the text.]
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/media/v4l2-subdev.h | 54 ++++++++++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index ed339f0116bf..1b74e037e440 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -1337,6 +1337,16 @@ void v4l2_subdev_cleanup(struct v4l2_subdev *sd);
 #define __v4l2_subdev_state_gen_call(NAME, _1, ARG, ...)	\
 	__v4l2_subdev_state_get_ ## NAME ## ARG
 
+/*
+ * A macro to constify the return value of the state accessors when the state
+ * parameter is const.
+ */
+#define __v4l2_subdev_state_constify_ret(state, value)				\
+	_Generic(state,								\
+		const struct v4l2_subdev_state *: (const typeof(*(value)) *)(value), \
+		struct v4l2_subdev_state *: (value)				\
+	)
+
 /**
  * v4l2_subdev_state_get_format() - Get pointer to a stream format
  * @state: subdevice state
@@ -1352,15 +1362,20 @@ void v4l2_subdev_cleanup(struct v4l2_subdev *sd);
 /*
  * Wrap v4l2_subdev_state_get_format(), allowing the function to be called with
  * two or three arguments. The purpose of the __v4l2_subdev_state_gen_call()
- * macro below is to come up with the name of the function or macro to call,
- * using the last two arguments (_stream and _pad). The selected function or
- * macro is then called using the arguments specified by the caller. A similar
- * arrangement is used for v4l2_subdev_state_crop(), v4l2_subdev_state_compose()
- * and v4l2_subdev_state_get_interval() below.
- */
-#define v4l2_subdev_state_get_format(state, pad, ...)			\
-	__v4l2_subdev_state_gen_call(format, ##__VA_ARGS__, , _pad)	\
-		(state, pad, ##__VA_ARGS__)
+ * macro is to come up with the name of the function or macro to call, using
+ * the last two arguments (_stream and _pad). The selected function or macro is
+ * then called using the arguments specified by the caller. The
+ * __v4l2_subdev_state_constify_ret() macro constifies the returned pointer
+ * when the state is const, allowing the state accessors to guarantee
+ * const-correctness in all cases.
+ *
+ * A similar arrangement is used for v4l2_subdev_state_crop(),
+ * v4l2_subdev_state_compose() and v4l2_subdev_state_get_interval() below.
+ */
+#define v4l2_subdev_state_get_format(state, pad, ...)				\
+	__v4l2_subdev_state_constify_ret(state,					\
+		__v4l2_subdev_state_gen_call(format, ##__VA_ARGS__, , _pad)	\
+			((struct v4l2_subdev_state *)state, pad, ##__VA_ARGS__))
 #define __v4l2_subdev_state_get_format_pad(state, pad)	\
 	__v4l2_subdev_state_get_format(state, pad, 0)
 struct v4l2_mbus_framefmt *
@@ -1379,9 +1394,10 @@ __v4l2_subdev_state_get_format(struct v4l2_subdev_state *state,
  * For stream-unaware drivers the crop rectangle for the corresponding pad is
  * returned. If the pad does not exist, NULL is returned.
  */
-#define v4l2_subdev_state_get_crop(state, pad, ...)			\
-	__v4l2_subdev_state_gen_call(crop, ##__VA_ARGS__, , _pad)	\
-		(state, pad, ##__VA_ARGS__)
+#define v4l2_subdev_state_get_crop(state, pad, ...)				\
+	__v4l2_subdev_state_constify_ret(state,					\
+		__v4l2_subdev_state_gen_call(crop, ##__VA_ARGS__, , _pad)	\
+			((struct v4l2_subdev_state *)state, pad, ##__VA_ARGS__))
 #define __v4l2_subdev_state_get_crop_pad(state, pad)	\
 	__v4l2_subdev_state_get_crop(state, pad, 0)
 struct v4l2_rect *
@@ -1400,9 +1416,10 @@ __v4l2_subdev_state_get_crop(struct v4l2_subdev_state *state, unsigned int pad,
  * For stream-unaware drivers the compose rectangle for the corresponding pad is
  * returned. If the pad does not exist, NULL is returned.
  */
-#define v4l2_subdev_state_get_compose(state, pad, ...)			\
-	__v4l2_subdev_state_gen_call(compose, ##__VA_ARGS__, , _pad)	\
-		(state, pad, ##__VA_ARGS__)
+#define v4l2_subdev_state_get_compose(state, pad, ...)				\
+	__v4l2_subdev_state_constify_ret(state,					\
+		__v4l2_subdev_state_gen_call(compose, ##__VA_ARGS__, , _pad)	\
+			((struct v4l2_subdev_state *)state, pad, ##__VA_ARGS__))
 #define __v4l2_subdev_state_get_compose_pad(state, pad)	\
 	__v4l2_subdev_state_get_compose(state, pad, 0)
 struct v4l2_rect *
@@ -1421,9 +1438,10 @@ __v4l2_subdev_state_get_compose(struct v4l2_subdev_state *state,
  * For stream-unaware drivers the frame interval for the corresponding pad is
  * returned. If the pad does not exist, NULL is returned.
  */
-#define v4l2_subdev_state_get_interval(state, pad, ...)			\
-	__v4l2_subdev_state_gen_call(interval, ##__VA_ARGS__, , _pad)	\
-		(state, pad, ##__VA_ARGS__)
+#define v4l2_subdev_state_get_interval(state, pad, ...)				\
+	__v4l2_subdev_state_constify_ret(state,					\
+		__v4l2_subdev_state_gen_call(interval, ##__VA_ARGS__, , _pad)	\
+			((struct v4l2_subdev_state *)state, pad, ##__VA_ARGS__))
 #define __v4l2_subdev_state_get_interval_pad(state, pad)	\
 	__v4l2_subdev_state_get_interval(state, pad, 0)
 struct v4l2_fract *
-- 
cgit v1.2.3


From 1d8491d3e726984343dd8c3cdbe2f2b47cfdd928 Mon Sep 17 00:00:00 2001
From: Paolo Pisati <p.pisati@gmail.com>
Date: Sat, 1 Jun 2024 17:32:54 +0200
Subject: m68k: amiga: Turn off Warp1260 interrupts during boot

On an Amiga 1200 equipped with a Warp1260 accelerator, an interrupt
storm coming from the accelerator board causes the machine to crash in
local_irq_enable() or auto_irq_enable().  Disabling interrupts for the
Warp1260 in amiga_parse_bootinfo() fixes the problem.

Link: https://lore.kernel.org/r/ZkjwzVwYeQtyAPrL@amaterasu.local
Cc: stable <stable@kernel.org>
Signed-off-by: Paolo Pisati <p.pisati@gmail.com>
Reviewed-by: Michael Schmitz <schmitzmic@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20240601153254.186225-1-p.pisati@gmail.com
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 include/uapi/linux/zorro_ids.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/zorro_ids.h b/include/uapi/linux/zorro_ids.h
index 6e574d7b7d79..393f2ee9c042 100644
--- a/include/uapi/linux/zorro_ids.h
+++ b/include/uapi/linux/zorro_ids.h
@@ -449,6 +449,9 @@
 #define  ZORRO_PROD_VMC_ISDN_BLASTER_Z2				ZORRO_ID(VMC, 0x01, 0)
 #define  ZORRO_PROD_VMC_HYPERCOM_4				ZORRO_ID(VMC, 0x02, 0)
 
+#define ZORRO_MANUF_CSLAB					0x1400
+#define  ZORRO_PROD_CSLAB_WARP_1260				ZORRO_ID(CSLAB, 0x65, 0)
+
 #define ZORRO_MANUF_INFORMATION					0x157C
 #define  ZORRO_PROD_INFORMATION_ISDN_ENGINE_I			ZORRO_ID(INFORMATION, 0x64, 0)
 
-- 
cgit v1.2.3


From 0e6be855a96dd44effce97b6b8d0cf0d0d0b7303 Mon Sep 17 00:00:00 2001
From: Xianwei Zhao <xianwei.zhao@amlogic.com>
Date: Wed, 22 May 2024 16:27:23 +0800
Subject: dt-bindings: clock: add Amlogic C3 PLL clock controller

Add the PLL clock controller dt-bindings for Amlogic C3 SoC family.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Co-developed-by: Chuan Liu <chuan.liu@amlogic.com>
Signed-off-by: Chuan Liu <chuan.liu@amlogic.com>
Signed-off-by: Xianwei Zhao <xianwei.zhao@amlogic.com>
Link: https://lore.kernel.org/r/20240522082727.3029656-2-xianwei.zhao@amlogic.com
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 include/dt-bindings/clock/amlogic,c3-pll-clkc.h | 40 +++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 include/dt-bindings/clock/amlogic,c3-pll-clkc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/amlogic,c3-pll-clkc.h b/include/dt-bindings/clock/amlogic,c3-pll-clkc.h
new file mode 100644
index 000000000000..fcdc558715e8
--- /dev/null
+++ b/include/dt-bindings/clock/amlogic,c3-pll-clkc.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */
+/*
+ * Copyright (c) 2023 Amlogic, Inc. All rights reserved.
+ * Author: Chuan Liu <chuan.liu@amlogic.com>
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_AMLOGIC_C3_PLL_CLKC_H
+#define _DT_BINDINGS_CLOCK_AMLOGIC_C3_PLL_CLKC_H
+
+#define CLKID_FCLK_50M_EN			0
+#define CLKID_FCLK_50M				1
+#define CLKID_FCLK_DIV2_DIV			2
+#define CLKID_FCLK_DIV2				3
+#define CLKID_FCLK_DIV2P5_DIV			4
+#define CLKID_FCLK_DIV2P5			5
+#define CLKID_FCLK_DIV3_DIV			6
+#define CLKID_FCLK_DIV3				7
+#define CLKID_FCLK_DIV4_DIV			8
+#define CLKID_FCLK_DIV4				9
+#define CLKID_FCLK_DIV5_DIV			10
+#define CLKID_FCLK_DIV5				11
+#define CLKID_FCLK_DIV7_DIV			12
+#define CLKID_FCLK_DIV7				13
+#define CLKID_GP0_PLL_DCO			14
+#define CLKID_GP0_PLL				15
+#define CLKID_HIFI_PLL_DCO			16
+#define CLKID_HIFI_PLL				17
+#define CLKID_MCLK_PLL_DCO			18
+#define CLKID_MCLK_PLL_OD			19
+#define CLKID_MCLK_PLL				20
+#define CLKID_MCLK0_SEL				21
+#define CLKID_MCLK0_SEL_EN			22
+#define CLKID_MCLK0_DIV				23
+#define CLKID_MCLK0				24
+#define CLKID_MCLK1_SEL				25
+#define CLKID_MCLK1_SEL_EN			26
+#define CLKID_MCLK1_DIV				27
+#define CLKID_MCLK1				28
+
+#endif  /* _DT_BINDINGS_CLOCK_AMLOGIC_C3_PLL_CLKC_H */
-- 
cgit v1.2.3


From d309989a0a0aff3b8ad5259aa11f119b15336c1a Mon Sep 17 00:00:00 2001
From: Xianwei Zhao <xianwei.zhao@amlogic.com>
Date: Wed, 22 May 2024 16:27:24 +0800
Subject: dt-bindings: clock: add Amlogic C3 SCMI clock controller support

Add the SCMI clock controller dt-bindings for Amlogic C3 SoC family

Acked-by: Rob Herring (Arm) <robh@kernel.org>
Co-developed-by: Chuan Liu <chuan.liu@amlogic.com>
Signed-off-by: Chuan Liu <chuan.liu@amlogic.com>
Signed-off-by: Xianwei Zhao <xianwei.zhao@amlogic.com>
Link: https://lore.kernel.org/r/20240522082727.3029656-3-xianwei.zhao@amlogic.com
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 include/dt-bindings/clock/amlogic,c3-scmi-clkc.h | 27 ++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 include/dt-bindings/clock/amlogic,c3-scmi-clkc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/amlogic,c3-scmi-clkc.h b/include/dt-bindings/clock/amlogic,c3-scmi-clkc.h
new file mode 100644
index 000000000000..663c9b349275
--- /dev/null
+++ b/include/dt-bindings/clock/amlogic,c3-scmi-clkc.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */
+/*
+ * Copyright (c) 2023 Amlogic, Inc. All rights reserved.
+ * Author: Chuan Liu <chuan.liu@amlogic.com>
+ */
+
+#ifndef __AMLOGIC_C3_SCMI_CLKC_H
+#define __AMLOGIC_C3_SCMI_CLKC_H
+
+#define CLKID_DDR_PLL_OSC			0
+#define CLKID_DDR_PHY				1
+#define CLKID_TOP_PLL_OSC			2
+#define CLKID_USB_PLL_OSC			3
+#define CLKID_MIPIISP_VOUT			4
+#define CLKID_MCLK_PLL_OSC			5
+#define CLKID_USB_CTRL				6
+#define CLKID_ETH_PLL_OSC			7
+#define CLKID_OSC				8
+#define CLKID_SYS_CLK				9
+#define CLKID_AXI_CLK				10
+#define CLKID_CPU_CLK				11
+#define CLKID_FIXED_PLL_OSC			12
+#define CLKID_GP1_PLL_OSC			13
+#define CLKID_SYS_PLL_DIV16			14
+#define CLKID_CPU_CLK_DIV16			15
+
+#endif /* __AMLOGIC_C3_SCMI_CLKC_H */
-- 
cgit v1.2.3


From fc1c7f941c71460a730a449f76764d883e270cba Mon Sep 17 00:00:00 2001
From: Xianwei Zhao <xianwei.zhao@amlogic.com>
Date: Wed, 22 May 2024 16:27:25 +0800
Subject: dt-bindings: clock: add Amlogic C3 peripherals clock controller

Add the peripherals clock controller dt-bindings for Amlogic C3 SoC family

Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Co-developed-by: Chuan Liu <chuan.liu@amlogic.com>
Signed-off-by: Chuan Liu <chuan.liu@amlogic.com>
Signed-off-by: Xianwei Zhao <xianwei.zhao@amlogic.com>
Link: https://lore.kernel.org/r/20240522082727.3029656-4-xianwei.zhao@amlogic.com
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 .../clock/amlogic,c3-peripherals-clkc.h            | 212 +++++++++++++++++++++
 1 file changed, 212 insertions(+)
 create mode 100644 include/dt-bindings/clock/amlogic,c3-peripherals-clkc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/amlogic,c3-peripherals-clkc.h b/include/dt-bindings/clock/amlogic,c3-peripherals-clkc.h
new file mode 100644
index 000000000000..d115c741c255
--- /dev/null
+++ b/include/dt-bindings/clock/amlogic,c3-peripherals-clkc.h
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */
+/*
+ * Copyright (c) 2023 Amlogic, Inc. All rights reserved.
+ * Author: Chuan Liu <chuan.liu@amlogic.com>
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_AMLOGIC_C3_PERIPHERALS_CLKC_H
+#define _DT_BINDINGS_CLOCK_AMLOGIC_C3_PERIPHERALS_CLKC_H
+
+#define CLKID_RTC_XTAL_CLKIN			0
+#define CLKID_RTC_32K_DIV			1
+#define CLKID_RTC_32K_MUX			2
+#define CLKID_RTC_32K				3
+#define CLKID_RTC_CLK				4
+#define CLKID_SYS_RESET_CTRL			5
+#define CLKID_SYS_PWR_CTRL			6
+#define CLKID_SYS_PAD_CTRL			7
+#define CLKID_SYS_CTRL				8
+#define CLKID_SYS_TS_PLL			9
+#define CLKID_SYS_DEV_ARB			10
+#define CLKID_SYS_MMC_PCLK			11
+#define CLKID_SYS_CPU_CTRL			12
+#define CLKID_SYS_JTAG_CTRL			13
+#define CLKID_SYS_IR_CTRL			14
+#define CLKID_SYS_IRQ_CTRL			15
+#define CLKID_SYS_MSR_CLK			16
+#define CLKID_SYS_ROM				17
+#define CLKID_SYS_UART_F			18
+#define CLKID_SYS_CPU_ARB			19
+#define CLKID_SYS_RSA				20
+#define CLKID_SYS_SAR_ADC			21
+#define CLKID_SYS_STARTUP			22
+#define CLKID_SYS_SECURE			23
+#define CLKID_SYS_SPIFC				24
+#define CLKID_SYS_NNA				25
+#define CLKID_SYS_ETH_MAC			26
+#define CLKID_SYS_GIC				27
+#define CLKID_SYS_RAMA				28
+#define CLKID_SYS_BIG_NIC			29
+#define CLKID_SYS_RAMB				30
+#define CLKID_SYS_AUDIO_PCLK			31
+#define CLKID_SYS_PWM_KL			32
+#define CLKID_SYS_PWM_IJ			33
+#define CLKID_SYS_USB				34
+#define CLKID_SYS_SD_EMMC_A			35
+#define CLKID_SYS_SD_EMMC_C			36
+#define CLKID_SYS_PWM_AB			37
+#define CLKID_SYS_PWM_CD			38
+#define CLKID_SYS_PWM_EF			39
+#define CLKID_SYS_PWM_GH			40
+#define CLKID_SYS_SPICC_1			41
+#define CLKID_SYS_SPICC_0			42
+#define CLKID_SYS_UART_A			43
+#define CLKID_SYS_UART_B			44
+#define CLKID_SYS_UART_C			45
+#define CLKID_SYS_UART_D			46
+#define CLKID_SYS_UART_E			47
+#define CLKID_SYS_I2C_M_A			48
+#define CLKID_SYS_I2C_M_B			49
+#define CLKID_SYS_I2C_M_C			50
+#define CLKID_SYS_I2C_M_D			51
+#define CLKID_SYS_I2S_S_A			52
+#define CLKID_SYS_RTC				53
+#define CLKID_SYS_GE2D				54
+#define CLKID_SYS_ISP				55
+#define CLKID_SYS_GPV_ISP_NIC			56
+#define CLKID_SYS_GPV_CVE_NIC			57
+#define CLKID_SYS_MIPI_DSI_HOST			58
+#define CLKID_SYS_MIPI_DSI_PHY			59
+#define CLKID_SYS_ETH_PHY			60
+#define CLKID_SYS_ACODEC			61
+#define CLKID_SYS_DWAP				62
+#define CLKID_SYS_DOS				63
+#define CLKID_SYS_CVE				64
+#define CLKID_SYS_VOUT				65
+#define CLKID_SYS_VC9000E			66
+#define CLKID_SYS_PWM_MN			67
+#define CLKID_SYS_SD_EMMC_B			68
+#define CLKID_AXI_SYS_NIC			69
+#define CLKID_AXI_ISP_NIC			70
+#define CLKID_AXI_CVE_NIC			71
+#define CLKID_AXI_RAMB				72
+#define CLKID_AXI_RAMA				73
+#define CLKID_AXI_CPU_DMC			74
+#define CLKID_AXI_NIC				75
+#define CLKID_AXI_DMA				76
+#define CLKID_AXI_MUX_NIC			77
+#define CLKID_AXI_CVE				78
+#define CLKID_AXI_DEV1_DMC			79
+#define CLKID_AXI_DEV0_DMC			80
+#define CLKID_AXI_DSP_DMC			81
+#define CLKID_12_24M_IN				82
+#define CLKID_12M_24M				83
+#define CLKID_FCLK_25M_DIV			84
+#define CLKID_FCLK_25M				85
+#define CLKID_GEN_SEL				86
+#define CLKID_GEN_DIV				87
+#define CLKID_GEN				88
+#define CLKID_SARADC_SEL			89
+#define CLKID_SARADC_DIV			90
+#define CLKID_SARADC				91
+#define CLKID_PWM_A_SEL				92
+#define CLKID_PWM_A_DIV				93
+#define CLKID_PWM_A				94
+#define CLKID_PWM_B_SEL				95
+#define CLKID_PWM_B_DIV				96
+#define CLKID_PWM_B				97
+#define CLKID_PWM_C_SEL				98
+#define CLKID_PWM_C_DIV				99
+#define CLKID_PWM_C				100
+#define CLKID_PWM_D_SEL				101
+#define CLKID_PWM_D_DIV				102
+#define CLKID_PWM_D				103
+#define CLKID_PWM_E_SEL				104
+#define CLKID_PWM_E_DIV				105
+#define CLKID_PWM_E				106
+#define CLKID_PWM_F_SEL				107
+#define CLKID_PWM_F_DIV				108
+#define CLKID_PWM_F				109
+#define CLKID_PWM_G_SEL				110
+#define CLKID_PWM_G_DIV				111
+#define CLKID_PWM_G				112
+#define CLKID_PWM_H_SEL				113
+#define CLKID_PWM_H_DIV				114
+#define CLKID_PWM_H				115
+#define CLKID_PWM_I_SEL				116
+#define CLKID_PWM_I_DIV				117
+#define CLKID_PWM_I				118
+#define CLKID_PWM_J_SEL				119
+#define CLKID_PWM_J_DIV				120
+#define CLKID_PWM_J				121
+#define CLKID_PWM_K_SEL				122
+#define CLKID_PWM_K_DIV				123
+#define CLKID_PWM_K				124
+#define CLKID_PWM_L_SEL				125
+#define CLKID_PWM_L_DIV				126
+#define CLKID_PWM_L				127
+#define CLKID_PWM_M_SEL				128
+#define CLKID_PWM_M_DIV				129
+#define CLKID_PWM_M				130
+#define CLKID_PWM_N_SEL				131
+#define CLKID_PWM_N_DIV				132
+#define CLKID_PWM_N				133
+#define CLKID_SPICC_A_SEL			134
+#define CLKID_SPICC_A_DIV			135
+#define CLKID_SPICC_A				136
+#define CLKID_SPICC_B_SEL			137
+#define CLKID_SPICC_B_DIV			138
+#define CLKID_SPICC_B				139
+#define CLKID_SPIFC_SEL				140
+#define CLKID_SPIFC_DIV				141
+#define CLKID_SPIFC				142
+#define CLKID_SD_EMMC_A_SEL			143
+#define CLKID_SD_EMMC_A_DIV			144
+#define CLKID_SD_EMMC_A				145
+#define CLKID_SD_EMMC_B_SEL			146
+#define CLKID_SD_EMMC_B_DIV			147
+#define CLKID_SD_EMMC_B				148
+#define CLKID_SD_EMMC_C_SEL			149
+#define CLKID_SD_EMMC_C_DIV			150
+#define CLKID_SD_EMMC_C				151
+#define CLKID_TS_DIV				152
+#define CLKID_TS				153
+#define CLKID_ETH_125M_DIV			154
+#define CLKID_ETH_125M				155
+#define CLKID_ETH_RMII_DIV			156
+#define CLKID_ETH_RMII				157
+#define CLKID_MIPI_DSI_MEAS_SEL			158
+#define CLKID_MIPI_DSI_MEAS_DIV			159
+#define CLKID_MIPI_DSI_MEAS			160
+#define CLKID_DSI_PHY_SEL			161
+#define CLKID_DSI_PHY_DIV			162
+#define CLKID_DSI_PHY				163
+#define CLKID_VOUT_MCLK_SEL			164
+#define CLKID_VOUT_MCLK_DIV			165
+#define CLKID_VOUT_MCLK				166
+#define CLKID_VOUT_ENC_SEL			167
+#define CLKID_VOUT_ENC_DIV			168
+#define CLKID_VOUT_ENC				169
+#define CLKID_HCODEC_0_SEL			170
+#define CLKID_HCODEC_0_DIV			171
+#define CLKID_HCODEC_0				172
+#define CLKID_HCODEC_1_SEL			173
+#define CLKID_HCODEC_1_DIV			174
+#define CLKID_HCODEC_1				175
+#define CLKID_HCODEC				176
+#define CLKID_VC9000E_ACLK_SEL			177
+#define CLKID_VC9000E_ACLK_DIV			178
+#define CLKID_VC9000E_ACLK			179
+#define CLKID_VC9000E_CORE_SEL			180
+#define CLKID_VC9000E_CORE_DIV			181
+#define CLKID_VC9000E_CORE			182
+#define CLKID_CSI_PHY0_SEL			183
+#define CLKID_CSI_PHY0_DIV			184
+#define CLKID_CSI_PHY0				185
+#define CLKID_DEWARPA_SEL			186
+#define CLKID_DEWARPA_DIV			187
+#define CLKID_DEWARPA				188
+#define CLKID_ISP0_SEL				189
+#define CLKID_ISP0_DIV				190
+#define CLKID_ISP0				191
+#define CLKID_NNA_CORE_SEL			192
+#define CLKID_NNA_CORE_DIV			193
+#define CLKID_NNA_CORE				194
+#define CLKID_GE2D_SEL				195
+#define CLKID_GE2D_DIV				196
+#define CLKID_GE2D				197
+#define CLKID_VAPB_SEL				198
+#define CLKID_VAPB_DIV				199
+#define CLKID_VAPB				200
+
+#endif  /* _DT_BINDINGS_CLOCK_AMLOGIC_C3_PERIPHERALS_CLKC_H */
-- 
cgit v1.2.3


From 668b6a2ef832a878494cc1b12a881c8ec0494b25 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 30 May 2024 19:08:34 +0200
Subject: flow_dissector: add support for tunnel control flags

Dissect [no]csum, [no]dontfrag, [no]oam, [no]crit flags from skb metadata.
This is a prerequisite for matching these control flags using TC flower.

Suggested-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/flow_dissector.h |  9 +++++++++
 include/net/ip_tunnels.h     | 12 ++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 9ab376d1a677..99626475c3f4 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -329,6 +329,14 @@ struct flow_dissector_key_cfm {
 #define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5)
 #define FLOW_DIS_CFM_MDL_MAX 7
 
+/**
+ * struct flow_dissector_key_enc_flags: tunnel metadata control flags
+ * @flags: tunnel control flags
+ */
+struct flow_dissector_key_enc_flags {
+	u32 flags;
+};
+
 enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
 	FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
@@ -363,6 +371,7 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */
 	FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */
 	FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */
+	FLOW_DISSECTOR_KEY_ENC_FLAGS, /* struct flow_dissector_key_enc_flags */
 
 	FLOW_DISSECTOR_KEY_MAX,
 };
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 9a6a08ec7713..5a530d4fb02c 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -247,6 +247,18 @@ static inline bool ip_tunnel_is_options_present(const unsigned long *flags)
 	return ip_tunnel_flags_intersect(flags, present);
 }
 
+static inline void ip_tunnel_set_encflags_present(unsigned long *flags)
+{
+	IP_TUNNEL_DECLARE_FLAGS(present) = { };
+
+	__set_bit(IP_TUNNEL_CSUM_BIT, present);
+	__set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, present);
+	__set_bit(IP_TUNNEL_OAM_BIT, present);
+	__set_bit(IP_TUNNEL_CRIT_OPT_BIT, present);
+
+	ip_tunnel_flags_or(flags, flags, present);
+}
+
 static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags)
 {
 	IP_TUNNEL_DECLARE_FLAGS(supp) = { };
-- 
cgit v1.2.3


From 1d17568e74dedbcb54d36af0662a15128297d681 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 30 May 2024 19:08:35 +0200
Subject: net/sched: cls_flower: add support for matching tunnel control flags

extend cls_flower to match TUNNEL_FLAGS_PRESENT bits in tunnel metadata.

Suggested-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/uapi/linux/pkt_cls.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 229fc925ec3a..b6d38f5fd7c0 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -554,6 +554,9 @@ enum {
 	TCA_FLOWER_KEY_SPI,		/* be32 */
 	TCA_FLOWER_KEY_SPI_MASK,	/* be32 */
 
+	TCA_FLOWER_KEY_ENC_FLAGS,	/* u32 */
+	TCA_FLOWER_KEY_ENC_FLAGS_MASK,	/* u32 */
+
 	__TCA_FLOWER_MAX,
 };
 
-- 
cgit v1.2.3


From 071115301838c6c265065dd5d6bf43a9a987a550 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 30 May 2024 16:36:14 -0700
Subject: tcp: wrap mptcp and decrypted checks into tcp_skb_can_collapse_rx()

tcp_skb_can_collapse() checks for conditions which don't make
sense on input. Because of this we ended up sprinkling a few
pairs of mptcp_skb_can_collapse() and skb_cmp_decrypted() calls
on the input path. Group them in a new helper. This should make
it less likely that someone will check mptcp and not decrypted
or vice versa when adding new code.

This implicitly adds a decrypted check early in tcp_collapse().
AFAIU this will very slightly increase our ability to collapse
packets under memory pressure, not a real bug.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/tcp.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 32815a40dea1..32741856da01 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1071,6 +1071,13 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
 		      skb_pure_zcopy_same(to, from));
 }
 
+static inline bool tcp_skb_can_collapse_rx(const struct sk_buff *to,
+					   const struct sk_buff *from)
+{
+	return likely(mptcp_skb_can_collapse(to, from) &&
+		      !skb_cmp_decrypted(to, from));
+}
+
 /* Events passed to congestion control interface */
 enum tcp_ca_event {
 	CA_EVENT_TX_START,	/* first transmit when no packets in flight */
-- 
cgit v1.2.3


From 1be68a87ab333af37b02ad928a724a722a5a8203 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 30 May 2024 16:36:15 -0700
Subject: tcp: add a helper for setting EOR on tail skb

TLS (and hopefully soon PSP will) use EOR to prevent skbs
with different decrypted state from getting merged, without
adding new tests to the skb handling. In both cases once
the connection switches to an "encrypted" state, all subsequent
skbs will be encrypted, so a single "EOR fence" is sufficient
to prevent mixing.

Add a helper for setting the EOR bit, to make this arrangement
more explicit.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/tcp.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 32741856da01..08c3b99501cf 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1066,6 +1066,7 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb)
 static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
 					const struct sk_buff *from)
 {
+	/* skb_cmp_decrypted() not needed, use tcp_write_collapse_fence() */
 	return likely(tcp_skb_can_collapse_to(to) &&
 		      mptcp_skb_can_collapse(to, from) &&
 		      skb_pure_zcopy_same(to, from));
@@ -2102,6 +2103,14 @@ static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct soc
 	tcp_wmem_free_skb(sk, skb);
 }
 
+static inline void tcp_write_collapse_fence(struct sock *sk)
+{
+	struct sk_buff *skb = tcp_write_queue_tail(sk);
+
+	if (skb)
+		TCP_SKB_CB(skb)->eor = 1;
+}
+
 static inline void tcp_push_pending_frames(struct sock *sk)
 {
 	if (tcp_send_head(sk)) {
-- 
cgit v1.2.3


From f85d39dd7ed89ffdd622bc1de247ffba8d961504 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@gmail.com>
Date: Mon, 27 May 2024 19:35:38 +0200
Subject: kcov, usb: disable interrupts in kcov_remote_start_usb_softirq

After commit 8fea0c8fda30 ("usb: core: hcd: Convert from tasklet to BH
workqueue"), usb_giveback_urb_bh() runs in the BH workqueue with
interrupts enabled.

Thus, the remote coverage collection section in usb_giveback_urb_bh()->
__usb_hcd_giveback_urb() might be interrupted, and the interrupt handler
might invoke __usb_hcd_giveback_urb() again.

This breaks KCOV, as it does not support nested remote coverage collection
sections within the same context (neither in task nor in softirq).

Update kcov_remote_start/stop_usb_softirq() to disable interrupts for the
duration of the coverage collection section to avoid nested sections in
the softirq context (in addition to such in the task context, which are
already handled).

Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Closes: https://lore.kernel.org/linux-usb/0f4d1964-7397-485b-bc48-11c01e2fcbca@I-love.SAKURA.ne.jp/
Closes: https://syzkaller.appspot.com/bug?extid=0438378d6f157baae1a2
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
Fixes: 8fea0c8fda30 ("usb: core: hcd: Convert from tasklet to BH workqueue")
Cc: stable@vger.kernel.org
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrey Konovalov <andreyknvl@gmail.com>
Link: https://lore.kernel.org/r/20240527173538.4989-1-andrey.konovalov@linux.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kcov.h | 47 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index b851ba415e03..1068a7318d89 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -55,21 +55,47 @@ static inline void kcov_remote_start_usb(u64 id)
 
 /*
  * The softirq flavor of kcov_remote_*() functions is introduced as a temporary
- * work around for kcov's lack of nested remote coverage sections support in
- * task context. Adding support for nested sections is tracked in:
- * https://bugzilla.kernel.org/show_bug.cgi?id=210337
+ * workaround for KCOV's lack of nested remote coverage sections support.
+ *
+ * Adding support is tracked in https://bugzilla.kernel.org/show_bug.cgi?id=210337.
+ *
+ * kcov_remote_start_usb_softirq():
+ *
+ * 1. Only collects coverage when called in the softirq context. This allows
+ *    avoiding nested remote coverage collection sections in the task context.
+ *    For example, USB/IP calls usb_hcd_giveback_urb() in the task context
+ *    within an existing remote coverage collection section. Thus, KCOV should
+ *    not attempt to start collecting coverage within the coverage collection
+ *    section in __usb_hcd_giveback_urb() in this case.
+ *
+ * 2. Disables interrupts for the duration of the coverage collection section.
+ *    This allows avoiding nested remote coverage collection sections in the
+ *    softirq context (a softirq might occur during the execution of a work in
+ *    the BH workqueue, which runs with in_serving_softirq() > 0).
+ *    For example, usb_giveback_urb_bh() runs in the BH workqueue with
+ *    interrupts enabled, so __usb_hcd_giveback_urb() might be interrupted in
+ *    the middle of its remote coverage collection section, and the interrupt
+ *    handler might invoke __usb_hcd_giveback_urb() again.
  */
 
-static inline void kcov_remote_start_usb_softirq(u64 id)
+static inline unsigned long kcov_remote_start_usb_softirq(u64 id)
 {
-	if (in_serving_softirq())
+	unsigned long flags = 0;
+
+	if (in_serving_softirq()) {
+		local_irq_save(flags);
 		kcov_remote_start_usb(id);
+	}
+
+	return flags;
 }
 
-static inline void kcov_remote_stop_softirq(void)
+static inline void kcov_remote_stop_softirq(unsigned long flags)
 {
-	if (in_serving_softirq())
+	if (in_serving_softirq()) {
 		kcov_remote_stop();
+		local_irq_restore(flags);
+	}
 }
 
 #ifdef CONFIG_64BIT
@@ -103,8 +129,11 @@ static inline u64 kcov_common_handle(void)
 }
 static inline void kcov_remote_start_common(u64 id) {}
 static inline void kcov_remote_start_usb(u64 id) {}
-static inline void kcov_remote_start_usb_softirq(u64 id) {}
-static inline void kcov_remote_stop_softirq(void) {}
+static inline unsigned long kcov_remote_start_usb_softirq(u64 id)
+{
+	return 0;
+}
+static inline void kcov_remote_stop_softirq(unsigned long flags) {}
 
 #endif /* CONFIG_KCOV */
 #endif /* _LINUX_KCOV_H */
-- 
cgit v1.2.3


From fe8db0bbe04d31ab17dc60e205c4a3365c92e67c Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Fri, 10 May 2024 20:12:42 +0000
Subject: usb: typec: Update sysfs when setting ops

When adding altmode ops, update the sysfs group so that visibility is
also recalculated.

Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Benson Leung <bleung@chromium.org>
Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Jameson Thies <jthies@google.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20240510201244.2968152-3-jthies@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/typec.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index b35b427561ab..549275f8ac1b 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -167,6 +167,9 @@ struct typec_port *typec_altmode2port(struct typec_altmode *alt);
 
 void typec_altmode_update_active(struct typec_altmode *alt, bool active);
 
+void typec_altmode_set_ops(struct typec_altmode *alt,
+			   const struct typec_altmode_ops *ops);
+
 enum typec_plug_index {
 	TYPEC_PLUG_SOP_P,
 	TYPEC_PLUG_SOP_PP,
-- 
cgit v1.2.3


From 971187350602d03c4a27c0783ff412502b95720a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 4 Jul 2023 14:17:19 +0100
Subject: driver core: remove devm_device_add_groups()

There is no more in-kernel users of this function, and no driver should
ever be using it, so remove it from the kernel.

Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: "Rafael J. Wysocki" <rafael@kernel.org>
Link: https://lore.kernel.org/r/20230704131715.44454-8-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index fc3bd7116ab9..ace039151cb8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1220,8 +1220,6 @@ static inline void device_remove_group(struct device *dev,
 	return device_remove_groups(dev, groups);
 }
 
-int __must_check devm_device_add_groups(struct device *dev,
-					const struct attribute_group **groups);
 int __must_check devm_device_add_group(struct device *dev,
 				       const struct attribute_group *grp);
 
-- 
cgit v1.2.3


From 44a45be57f85165761fdabf072f9a97aa026ff61 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Thu, 23 May 2024 13:00:00 +0200
Subject: sysfs: Unbreak the build around sysfs_bin_attr_simple_read()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Günter reports build breakage for m68k "m5208evb_defconfig" plus
CONFIG_BLK_DEV_INITRD=y caused by commit 66bc1a173328 ("treewide:
Use sysfs_bin_attr_simple_read() helper").

The defconfig disables CONFIG_SYSFS, so sysfs_bin_attr_simple_read()
is not compiled into the kernel.  But init/initramfs.c references
that function in the initializer of a struct bin_attribute.

Add an empty static inline to avoid the build breakage.

Fixes: 66bc1a173328 ("treewide: Use sysfs_bin_attr_simple_read() helper")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Closes: https://lore.kernel.org/r/e12b0027-b199-4de7-b83d-668171447ccc@roeck-us.net
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/r/05f4290439a58730738a15b0c99cd8576c4aa0d9.1716461752.git.lukas@wunner.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index a7d725fbf739..c4e64dc11206 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -750,6 +750,15 @@ static inline int sysfs_emit_at(char *buf, int at, const char *fmt, ...)
 {
 	return 0;
 }
+
+static inline ssize_t sysfs_bin_attr_simple_read(struct file *file,
+						 struct kobject *kobj,
+						 struct bin_attribute *attr,
+						 char *buf, loff_t off,
+						 size_t count)
+{
+	return 0;
+}
 #endif /* CONFIG_SYSFS */
 
 static inline int __must_check sysfs_create_file(struct kobject *kobj,
-- 
cgit v1.2.3


From 42675b723b4842bca7bfb0f209aa9a493a10324a Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:05 -0400
Subject: function_graph: Convert ret_stack to a series of longs

In order to make it possible to have multiple callbacks registered with the
function_graph tracer, the retstack needs to be converted from an array of
ftrace_ret_stack structures to an array of longs. This will allow to store
the list of callbacks on the stack for the return side of the functions.

Link: https://lore.kernel.org/linux-trace-kernel/171509092742.162236.4427737821399314856.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190821.073111754@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61591ac6eab6..352939dab3a5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1402,7 +1402,7 @@ struct task_struct {
 	int				curr_ret_depth;
 
 	/* Stack of return addresses for return function tracing: */
-	struct ftrace_ret_stack		*ret_stack;
+	unsigned long			*ret_stack;
 
 	/* Timestamp for last schedule: */
 	unsigned long long		ftrace_timestamp;
-- 
cgit v1.2.3


From 7aa1eaef9f4282c9acd39588b1fdc9dda7e73f34 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:08 -0400
Subject: function_graph: Allow multiple users to attach to function graph

Allow for multiple users to attach to function graph tracer at the same
time. Only 16 simultaneous users can attach to the tracer. This is because
there's an array that stores the pointers to the attached fgraph_ops. When
a function being traced is entered, each of the ftrace_ops entryfunc is
called and if it returns non zero, its index into the array will be added
to the shadow stack.

On exit of the function being traced, the shadow stack will contain the
indexes of the ftrace_ops on the array that want their retfunc to be
called.

Because a function may sleep for a long time (if a task sleeps itself),
the return of the function may be literally days later. If the ftrace_ops
is removed, its place on the array is replaced with a ftrace_ops that
contains the stub functions and that will be called when the function
finally returns.

If another ftrace_ops is added that happens to get the same index into the
array, its return function may be called. But that's actually the way
things current work with the old function graph tracer. If one tracer is
removed and another is added, the new one will get the return calls of the
function traced by the previous one, thus this is not a regression. This
can be fixed by adding a counter to each time the array item is updated and
save that on the shadow stack as well, such that it won't be called if the
index saved does not match the index on the array.

Note, being able to filter functions when both are called is not completely
handled yet, but that shouldn't be too hard to manage.

Co-developed with Masami Hiramatsu:
Link: https://lore.kernel.org/linux-trace-kernel/171509096221.162236.8806372072523195752.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190821.555493396@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 800995c425e0..8f00a7415bd5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1038,6 +1038,7 @@ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace);
 struct fgraph_ops {
 	trace_func_graph_ent_t		entryfunc;
 	trace_func_graph_ret_t		retfunc;
+	int				idx;
 };
 
 /*
@@ -1072,7 +1073,7 @@ function_graph_enter(unsigned long ret, unsigned long func,
 		     unsigned long frame_pointer, unsigned long *retp);
 
 struct ftrace_ret_stack *
-ftrace_graph_get_ret_stack(struct task_struct *task, int idx);
+ftrace_graph_get_ret_stack(struct task_struct *task, int skip);
 
 unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
 				    unsigned long ret, unsigned long *retp);
-- 
cgit v1.2.3


From 37238abe3cb47b8daaa8706c9949f67b2a705cf1 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:11 -0400
Subject: ftrace/function_graph: Pass fgraph_ops to function graph callbacks

Pass the fgraph_ops structure to the function graph callbacks. This will
allow callbacks to add a descriptor to a fgraph_ops private field that wil
be added in the future and use it for the callbacks. This will be useful
when more than one callback can be registered to the function graph tracer.

Co-developed with Masami Hiramatsu:
Link: https://lore.kernel.org/linux-trace-kernel/171509098588.162236.4787930115997357578.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.035147698@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8f00a7415bd5..4d817b85ac0d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1027,11 +1027,15 @@ struct ftrace_graph_ret {
 	unsigned long long rettime;
 } __packed;
 
+struct fgraph_ops;
+
 /* Type of the callback handlers for tracing function graph*/
-typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */
-typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
+typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *,
+				       struct fgraph_ops *); /* return */
+typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *,
+				      struct fgraph_ops *); /* entry */
 
-extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace);
+extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph_ops *gops);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
-- 
cgit v1.2.3


From 26dda5631d1bb2f254f4c94aa87ee6c92a89cfdb Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:12 -0400
Subject: ftrace: Allow function_graph tracer to be enabled in instances

Now that function graph tracing can handle more than one user, allow it to
be enabled in the ftrace instances. Note, the filtering of the functions is
still joined by the top level set_ftrace_filter and friends, as well as the
graph and nograph files.

Co-developed with Masami Hiramatsu:
Link: https://lore.kernel.org/linux-trace-kernel/171509099743.162236.1699959255446248163.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.190630762@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4d817b85ac0d..fd656e6d6b7c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1042,6 +1042,7 @@ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph
 struct fgraph_ops {
 	trace_func_graph_ent_t		entryfunc;
 	trace_func_graph_ret_t		retfunc;
+	void				*private;
 	int				idx;
 };
 
-- 
cgit v1.2.3


From ab6b84630382914ffcbab59f4913c9a60971d034 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:13 -0400
Subject: ftrace: Allow ftrace startup flags to exist without dynamic ftrace

Some of the flags for ftrace_startup() may be exposed even when
CONFIG_DYNAMIC_FTRACE is not configured in. This is fine as the difference
between dynamic ftrace and static ftrace is done within the internals of
ftrace itself. No need to have use cases fail to compile because dynamic
ftrace is disabled.

This change is needed to move some of the logic of what is passed to
ftrace_startup() out of the parameters of ftrace_startup().

Link: https://lore.kernel.org/linux-trace-kernel/171509100890.162236.4362350342549122222.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.350654104@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fd656e6d6b7c..586018744785 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -509,6 +509,15 @@ static inline void stack_tracer_disable(void) { }
 static inline void stack_tracer_enable(void) { }
 #endif
 
+enum {
+	FTRACE_UPDATE_CALLS		= (1 << 0),
+	FTRACE_DISABLE_CALLS		= (1 << 1),
+	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
+	FTRACE_START_FUNC_RET		= (1 << 3),
+	FTRACE_STOP_FUNC_RET		= (1 << 4),
+	FTRACE_MAY_SLEEP		= (1 << 5),
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 void ftrace_arch_code_modify_prepare(void);
@@ -603,15 +612,6 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
 void ftrace_free_filter(struct ftrace_ops *ops);
 void ftrace_ops_set_global_filter(struct ftrace_ops *ops);
 
-enum {
-	FTRACE_UPDATE_CALLS		= (1 << 0),
-	FTRACE_DISABLE_CALLS		= (1 << 1),
-	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
-	FTRACE_START_FUNC_RET		= (1 << 3),
-	FTRACE_STOP_FUNC_RET		= (1 << 4),
-	FTRACE_MAY_SLEEP		= (1 << 5),
-};
-
 /*
  * The FTRACE_UPDATE_* enum is used to pass information back
  * from the ftrace_update_record() and ftrace_test_record()
-- 
cgit v1.2.3


From 5fccc7552ccbc521bad61653ee739b1196b1bc53 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:14 -0400
Subject: ftrace: Add subops logic to allow one ops to manage many

There are cases where a single system will use a single function callback
to handle multiple users. For example, to allow function_graph tracer to
have multiple users where each can trace their own set of functions, it is
useful to only have one ftrace_ops registered to ftrace that will call a
function by the function_graph tracer to handle the multiplexing with the
different registered  function_graph tracers.

Add a "subop_list" to the ftrace_ops that will hold a list of other
ftrace_ops that the top ftrace_ops will manage.

The function ftrace_startup_subops() that takes the manager ftrace_ops and
a subop ftrace_ops it will manage. If there are no subops with the
ftrace_ops yet, it will copy the ftrace_ops subop filters to the manager
ftrace_ops and register that with ftrace_startup(), and adds the subop to
its subop_list. If the manager ops already has something registered, it
will then merge the new subop filters with what it has and enable the new
functions that covers all the subops it has.

To remove a subop, ftrace_shutdown_subops() is called which will use the
subop_list of the manager ops to rebuild all the functions it needs to
trace, and update the ftrace records to only call the functions it now has
registered. If there are no more functions registered, it will then call
ftrace_shutdown() to disable itself completely.

Note, it is up to the manager ops callback to always make sure that the
subops callbacks are called if its filter matches, as there are times in
the update where the callback could be calling more functions than those
that are currently registered.

This could be updated to handle other systems other than function_graph,
for example, fprobes could use this (but will need an interface to call
ftrace_startup_subops()).

Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.508431129@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 586018744785..978a1d3b270a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -334,6 +334,7 @@ struct ftrace_ops {
 	unsigned long			trampoline;
 	unsigned long			trampoline_size;
 	struct list_head		list;
+	struct list_head		subop_list;
 	ftrace_ops_func_t		ops_func;
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 	unsigned long			direct_call;
-- 
cgit v1.2.3


From d9bbfbd14f58d2955cc7a3efa8ae6d4e09ee5995 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:15 -0400
Subject: ftrace: Allow subops filtering to be modified

The subops filters use a "manager" ops to enable and disable its filters.
The manager ops can handle more than one subops, and its filter is what
controls what functions get set. Add a ftrace_hash_move_and_update_subops()
function that will update the manager ops when the subops filters change.

Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.673932251@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 978a1d3b270a..63238a9a9270 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -227,6 +227,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
  *             ftrace_enabled.
  * DIRECT - Used by the direct ftrace_ops helper for direct functions
  *            (internal ftrace only, should not be used by others)
+ * SUBOP  - Is controlled by another op in field managed.
  */
 enum {
 	FTRACE_OPS_FL_ENABLED			= BIT(0),
@@ -247,6 +248,7 @@ enum {
 	FTRACE_OPS_FL_TRACE_ARRAY		= BIT(15),
 	FTRACE_OPS_FL_PERMANENT                 = BIT(16),
 	FTRACE_OPS_FL_DIRECT			= BIT(17),
+	FTRACE_OPS_FL_SUBOP			= BIT(18),
 };
 
 #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
@@ -336,6 +338,7 @@ struct ftrace_ops {
 	struct list_head		list;
 	struct list_head		subop_list;
 	ftrace_ops_func_t		ops_func;
+	struct ftrace_ops		*managed;
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 	unsigned long			direct_call;
 #endif
-- 
cgit v1.2.3


From c132be2c4fcc1150ad0791c2a85dd4c9ad0bd0c8 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:16 -0400
Subject: function_graph: Have the instances use their own ftrace_ops for
 filtering

Allow for instances to have their own ftrace_ops part of the fgraph_ops
that makes the funtion_graph tracer filter on the set_ftrace_filter file
of the instance and not the top instance.

This uses the new ftrace_startup_subops(), by using graph_ops as the
"manager ops" that defines the callback function and adds the functions
defined by the filters of the ops for each trace instance. The callback
defined by the manager ops will call the registered fgraph ops that were
added to the fgraph_array.

Co-developed with Masami Hiramatsu:
Link: https://lore.kernel.org/linux-trace-kernel/171509102088.162236.15758883237657317789.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.832946261@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 63238a9a9270..8f865689e868 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1046,6 +1046,7 @@ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph
 struct fgraph_ops {
 	trace_func_graph_ent_t		entryfunc;
 	trace_func_graph_ret_t		retfunc;
+	struct ftrace_ops		ops; /* for the hash lists */
 	void				*private;
 	int				idx;
 };
-- 
cgit v1.2.3


From df3ec5da6a1e7f6e142680d7c5266d3af187170b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:17 -0400
Subject: function_graph: Add pid tracing back to function graph tracer

Now that the function_graph has a main callback that handles the function
graph subops tracing, it no longer honors the pid filtering of ftrace. Add
back this logic in the function_graph code to update the gops callback for
the entry function to test if it should trace the current task or not.

Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.991720703@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8f865689e868..e31ec8516de1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1040,6 +1040,7 @@ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *,
 				      struct fgraph_ops *); /* entry */
 
 extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph_ops *gops);
+bool ftrace_pids_enabled(struct ftrace_ops *ops);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
@@ -1048,6 +1049,7 @@ struct fgraph_ops {
 	trace_func_graph_ret_t		retfunc;
 	struct ftrace_ops		ops; /* for the hash lists */
 	void				*private;
+	trace_func_graph_ent_t		saved_func;
 	int				idx;
 };
 
-- 
cgit v1.2.3


From 4497412a1f7b5d9e0849f125652f2cc58cdba562 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:19 -0400
Subject: function_graph: Add "task variables" per task for fgraph_ops

Add a "task variables" array on the tasks shadow ret_stack that is the
size of longs for each possible registered fgraph_ops. That's a total
of 16, taking up 8 * 16 = 128 bytes (out of a page size 4k).

This will allow for fgraph_ops to do specific features on a per task basis
having a way to maintain state for each task.

Co-developed with Masami Hiramatsu:
Link: https://lore.kernel.org/linux-trace-kernel/171509104383.162236.12239656156685718550.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.308806126@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e31ec8516de1..82cfc8e09cfd 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1089,6 +1089,7 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int skip);
 
 unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
 				    unsigned long ret, unsigned long *retp);
+unsigned long *fgraph_get_task_var(struct fgraph_ops *gops);
 
 /*
  * Sometimes we don't want to trace a function with the function
-- 
cgit v1.2.3


From 12117f3307b63f287756d7ec8cc4f11b94e1206a Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:20 -0400
Subject: function_graph: Move set_graph_function tests to shadow stack global
 var

The use of the task->trace_recursion for the logic used for the
set_graph_function was a bit of an abuse of that variable. Now that there
exists global vars that are per stack for registered graph traces, use that
instead.

Link: https://lore.kernel.org/linux-trace-kernel/171509105520.162236.10339831553995971290.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.472955399@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_recursion.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index 24ea8ac049b4..02e6afc6d7fe 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -44,9 +44,6 @@ enum {
  */
 	TRACE_IRQ_BIT,
 
-	/* Set if the function is in the set_graph_function file */
-	TRACE_GRAPH_BIT,
-
 	/*
 	 * In the very unlikely case that an interrupt came in
 	 * at a start of graph tracing, and we want to trace
@@ -60,7 +57,7 @@ enum {
 	 * that preempted a softirq start of a function that
 	 * preempted normal context!!!! Luckily, it can't be
 	 * greater than 3, so the next two bits are a mask
-	 * of what the depth is when we set TRACE_GRAPH_BIT
+	 * of what the depth is when we set TRACE_GRAPH_FL
 	 */
 
 	TRACE_GRAPH_DEPTH_START_BIT,
-- 
cgit v1.2.3


From 068da098eb504469dc195137ae35eeacfe0c8de9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:21 -0400
Subject: function_graph: Move graph depth stored data to shadow stack global
 var

The use of the task->trace_recursion for the logic used for the function
graph depth was a bit of an abuse of that variable. Now that there
exists global vars that are per stack for registered graph traces, use that
instead.

Link: https://lore.kernel.org/linux-trace-kernel/171509106728.162236.2398372644430125344.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.634870264@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_recursion.h | 29 -----------------------------
 1 file changed, 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index 02e6afc6d7fe..fdfb6f66718a 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -44,25 +44,6 @@ enum {
  */
 	TRACE_IRQ_BIT,
 
-	/*
-	 * In the very unlikely case that an interrupt came in
-	 * at a start of graph tracing, and we want to trace
-	 * the function in that interrupt, the depth can be greater
-	 * than zero, because of the preempted start of a previous
-	 * trace. In an even more unlikely case, depth could be 2
-	 * if a softirq interrupted the start of graph tracing,
-	 * followed by an interrupt preempting a start of graph
-	 * tracing in the softirq, and depth can even be 3
-	 * if an NMI came in at the start of an interrupt function
-	 * that preempted a softirq start of a function that
-	 * preempted normal context!!!! Luckily, it can't be
-	 * greater than 3, so the next two bits are a mask
-	 * of what the depth is when we set TRACE_GRAPH_FL
-	 */
-
-	TRACE_GRAPH_DEPTH_START_BIT,
-	TRACE_GRAPH_DEPTH_END_BIT,
-
 	/*
 	 * To implement set_graph_notrace, if this bit is set, we ignore
 	 * function graph tracing of called functions, until the return
@@ -78,16 +59,6 @@ enum {
 #define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(1<<(bit)); } while (0)
 #define trace_recursion_test(bit)	((current)->trace_recursion & (1<<(bit)))
 
-#define trace_recursion_depth() \
-	(((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3)
-#define trace_recursion_set_depth(depth) \
-	do {								\
-		current->trace_recursion &=				\
-			~(3 << TRACE_GRAPH_DEPTH_START_BIT);		\
-		current->trace_recursion |=				\
-			((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT;	\
-	} while (0)
-
 #define TRACE_CONTEXT_BITS	4
 
 #define TRACE_FTRACE_START	TRACE_FTRACE_BIT
-- 
cgit v1.2.3


From b84214890a9bc56f0fe4ec4fc72f2307ed05096d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:22 -0400
Subject: function_graph: Move graph notrace bit to shadow stack global var

The use of the task->trace_recursion for the logic used for the function
graph no-trace was a bit of an abuse of that variable. Now that there
exists global vars that are per stack for registered graph traces, use
that instead.

Link: https://lore.kernel.org/linux-trace-kernel/171509107907.162236.6564679266777519065.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.796709456@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_recursion.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index fdfb6f66718a..ae04054a1be3 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -44,13 +44,6 @@ enum {
  */
 	TRACE_IRQ_BIT,
 
-	/*
-	 * To implement set_graph_notrace, if this bit is set, we ignore
-	 * function graph tracing of called functions, until the return
-	 * function is called to clear it.
-	 */
-	TRACE_GRAPH_NOTRACE_BIT,
-
 	/* Used to prevent recursion recording from recursing. */
 	TRACE_RECORD_RECURSION_BIT,
 };
-- 
cgit v1.2.3


From 91c46b0aa917546432b5b219494859cda0edc39e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 3 Jun 2024 15:07:23 -0400
Subject: function_graph: Implement fgraph_reserve_data() and
 fgraph_retrieve_data()

Added functions that can be called by a fgraph_ops entryfunc and retfunc to
store state between the entry of the function being traced to the exit of
the same function. The fgraph_ops entryfunc() may call
fgraph_reserve_data() to store up to 32 words onto the task's shadow
ret_stack and this then can be retrieved by fgraph_retrieve_data() called
by the corresponding retfunc().

Co-developed with Masami Hiramatsu:
Link: https://lore.kernel.org/linux-trace-kernel/171509109089.162236.11372474169781184034.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.959703050@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 82cfc8e09cfd..9f61556a9491 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1053,6 +1053,9 @@ struct fgraph_ops {
 	int				idx;
 };
 
+void *fgraph_reserve_data(int idx, int size_bytes);
+void *fgraph_retrieve_data(int idx, int *size_bytes);
+
 /*
  * Stack of return addresses for functions
  * of a thread.
-- 
cgit v1.2.3


From 3aee48a8e01f98d3285a0064285b0152cdbb8a9e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 8 May 2024 21:46:57 +0300
Subject: misc: eeprom_93xx46: Hide legacy platform data in the driver

First of all, there is no user for the platform data in the kernel.
Second, it needs a lot of updates to follow the modern standards
of the kernel, including proper Device Tree bindings and device
property handling.

For now, just hide the legacy platform data in the driver's code.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20240508184905.2102633-5-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/eeprom_93xx46.h | 32 --------------------------------
 1 file changed, 32 deletions(-)
 delete mode 100644 include/linux/eeprom_93xx46.h

(limited to 'include')

diff --git a/include/linux/eeprom_93xx46.h b/include/linux/eeprom_93xx46.h
deleted file mode 100644
index 34c2175e6a1e..000000000000
--- a/include/linux/eeprom_93xx46.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Module: eeprom_93xx46
- * platform description for 93xx46 EEPROMs.
- */
-#include <linux/gpio/consumer.h>
-
-struct eeprom_93xx46_platform_data {
-	unsigned char	flags;
-#define EE_ADDR8	0x01		/*  8 bit addr. cfg */
-#define EE_ADDR16	0x02		/* 16 bit addr. cfg */
-#define EE_READONLY	0x08		/* forbid writing */
-#define EE_SIZE1K	0x10		/* 1 kb of data, that is a 93xx46 */
-#define EE_SIZE2K	0x20		/* 2 kb of data, that is a 93xx56 */
-#define EE_SIZE4K	0x40		/* 4 kb of data, that is a 93xx66 */
-
-	unsigned int	quirks;
-/* Single word read transfers only; no sequential read. */
-#define EEPROM_93XX46_QUIRK_SINGLE_WORD_READ		(1 << 0)
-/* Instructions such as EWEN are (addrlen + 2) in length. */
-#define EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH		(1 << 1)
-/* Add extra cycle after address during a read */
-#define EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE		BIT(2)
-
-	/*
-	 * optional hooks to control additional logic
-	 * before and after spi transfer.
-	 */
-	void (*prepare)(void *);
-	void (*finish)(void *);
-	struct gpio_desc *select;
-};
-- 
cgit v1.2.3


From 1968845d358e108cfbfba45538d64b3cbdf04ac2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 31 May 2024 17:51:29 +0300
Subject: driver core: device.h: Group of_node handling declarations and
 definitions

There are a few of_node related APIs defined in the driver core.
Group the respective declarations and definitions in the header.
There is no functional change.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20240531145129.1506733-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index fc3bd7116ab9..56f266429229 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1031,13 +1031,6 @@ static inline void device_lock_assert(struct device *dev)
 	lockdep_assert_held(&dev->mutex);
 }
 
-static inline struct device_node *dev_of_node(struct device *dev)
-{
-	if (!IS_ENABLED(CONFIG_OF) || !dev)
-		return NULL;
-	return dev->of_node;
-}
-
 static inline bool dev_has_sync_state(struct device *dev)
 {
 	if (!dev)
@@ -1144,10 +1137,18 @@ void unlock_device_hotplug(void);
 int lock_device_hotplug_sysfs(void);
 int device_offline(struct device *dev);
 int device_online(struct device *dev);
+
 void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
 void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
-void device_set_of_node_from_dev(struct device *dev, const struct device *dev2);
 void device_set_node(struct device *dev, struct fwnode_handle *fwnode);
+void device_set_of_node_from_dev(struct device *dev, const struct device *dev2);
+
+static inline struct device_node *dev_of_node(struct device *dev)
+{
+	if (!IS_ENABLED(CONFIG_OF) || !dev)
+		return NULL;
+	return dev->of_node;
+}
 
 static inline int dev_num_vf(struct device *dev)
 {
-- 
cgit v1.2.3


From fd7179ece035417f44f7ecff086d6df674d8a5bd Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Thu, 30 May 2024 10:14:08 -0500
Subject: iio: introduce struct iio_scan_type

This gives the channel scan_type a named type so that it can be used
to simplify code in later commits.

Signed-off-by: David Lechner <dlechner@baylibre.com>
Reviewed-by: Nuno Sa <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20240530-iio-add-support-for-multiple-scan-types-v3-1-cbc4acea2cfa@baylibre.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 55e2b22086a1..19de573a944a 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -173,6 +173,27 @@ struct iio_event_spec {
 	unsigned long mask_shared_by_all;
 };
 
+/**
+ * struct iio_scan_type - specification for channel data format in buffer
+ * @sign:		's' or 'u' to specify signed or unsigned
+ * @realbits:		Number of valid bits of data
+ * @storagebits:	Realbits + padding
+ * @shift:		Shift right by this before masking out realbits.
+ * @repeat:		Number of times real/storage bits repeats. When the
+ *			repeat element is more than 1, then the type element in
+ *			sysfs will show a repeat value. Otherwise, the number
+ *			of repetitions is omitted.
+ * @endianness:		little or big endian
+ */
+struct iio_scan_type {
+	char	sign;
+	u8	realbits;
+	u8	storagebits;
+	u8	shift;
+	u8	repeat;
+	enum iio_endian endianness;
+};
+
 /**
  * struct iio_chan_spec - specification of a single channel
  * @type:		What type of measurement is the channel making.
@@ -184,17 +205,6 @@ struct iio_event_spec {
  * @scan_index:		Monotonic index to give ordering in scans when read
  *			from a buffer.
  * @scan_type:		struct describing the scan type
- * @scan_type.sign:		's' or 'u' to specify signed or unsigned
- * @scan_type.realbits:		Number of valid bits of data
- * @scan_type.storagebits:	Realbits + padding
- * @scan_type.shift:		Shift right by this before masking out
- *				realbits.
- * @scan_type.repeat:		Number of times real/storage bits repeats.
- *				When the repeat element is more than 1, then
- *				the type element in sysfs will show a repeat
- *				value. Otherwise, the number of repetitions
- *				is omitted.
- * @scan_type.endianness:	little or big endian
  * @info_mask_separate: What information is to be exported that is specific to
  *			this channel.
  * @info_mask_separate_available: What availability information is to be
@@ -245,14 +255,7 @@ struct iio_chan_spec {
 	int			channel2;
 	unsigned long		address;
 	int			scan_index;
-	struct {
-		char	sign;
-		u8	realbits;
-		u8	storagebits;
-		u8	shift;
-		u8	repeat;
-		enum iio_endian endianness;
-	} scan_type;
+	struct iio_scan_type scan_type;
 	long			info_mask_separate;
 	long			info_mask_separate_available;
 	long			info_mask_shared_by_type;
-- 
cgit v1.2.3


From d8f2bb50845f2797f594ffe3cac9417abff4d7b0 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Thu, 30 May 2024 10:14:10 -0500
Subject: iio: add support for multiple scan types per channel

This adds new fields to the iio_channel structure to support multiple
scan types per channel. This is useful for devices that support multiple
resolution modes or other modes that require different data formats of
the raw data.

To make use of this, drivers need to implement the new callback
get_current_scan_type() to resolve the scan type for a given channel
based on the current state of the driver. There is a new scan_type_ext
field in the iio_channel structure that should be used to store the
scan types for any channel that has more than one. There is also a new
flag has_ext_scan_type that acts as a type discriminator for the
scan_type/ext_scan_type union. A union is used so that we don't grow
the size of the iio_channel structure and also makes it clear that
scan_type and ext_scan_type are mutually exclusive.

The buffer code is the only code in the IIO core code that is using the
scan_type field. This patch updates the buffer code to use the new
iio_channel_validate_scan_type() function to ensure it is returning the
correct scan type for the current state of the device when reading the
sysfs attributes. The buffer validation code is also update to validate
any additional scan types that are set in the scan_type_ext field. Part
of that code is refactored to a new function to avoid duplication.

Some userspace tools may need to be updated to re-read the scan type
after writing any other attribute. During testing, we noticed that we
had to restart iiod to get it to re-read the scan type after enabling
oversampling on the ad7380 driver.

Signed-off-by: David Lechner <dlechner@baylibre.com>
Reviewed-by: Nuno Sa <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20240530-iio-add-support-for-multiple-scan-types-v3-3-cbc4acea2cfa@baylibre.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 19de573a944a..894309294182 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -204,7 +204,13 @@ struct iio_scan_type {
  * @address:		Driver specific identifier.
  * @scan_index:		Monotonic index to give ordering in scans when read
  *			from a buffer.
- * @scan_type:		struct describing the scan type
+ * @scan_type:		struct describing the scan type - mutually exclusive
+ *			with ext_scan_type.
+ * @ext_scan_type:	Used in rare cases where there is more than one scan
+ *			format for a channel. When this is used, the flag
+ *			has_ext_scan_type must be set and the driver must
+ *			implement get_current_scan_type in struct iio_info.
+ * @num_ext_scan_type:	Number of elements in ext_scan_type.
  * @info_mask_separate: What information is to be exported that is specific to
  *			this channel.
  * @info_mask_separate_available: What availability information is to be
@@ -248,6 +254,7 @@ struct iio_scan_type {
  *			attributes but not for event codes.
  * @output:		Channel is output.
  * @differential:	Channel is differential.
+ * @has_ext_scan_type:	True if ext_scan_type is used instead of scan_type.
  */
 struct iio_chan_spec {
 	enum iio_chan_type	type;
@@ -255,7 +262,13 @@ struct iio_chan_spec {
 	int			channel2;
 	unsigned long		address;
 	int			scan_index;
-	struct iio_scan_type scan_type;
+	union {
+		struct iio_scan_type scan_type;
+		struct {
+			const struct iio_scan_type *ext_scan_type;
+			unsigned int num_ext_scan_type;
+		};
+	};
 	long			info_mask_separate;
 	long			info_mask_separate_available;
 	long			info_mask_shared_by_type;
@@ -273,6 +286,7 @@ struct iio_chan_spec {
 	unsigned		indexed:1;
 	unsigned		output:1;
 	unsigned		differential:1;
+	unsigned		has_ext_scan_type:1;
 };
 
 
@@ -435,6 +449,9 @@ struct iio_trigger; /* forward declaration */
  *			for better event identification.
  * @validate_trigger:	function to validate the trigger when the
  *			current trigger gets changed.
+ * @get_current_scan_type: must be implemented by drivers that use ext_scan_type
+ *			in the channel spec to return the index of the currently
+ *			active ext_scan type for a channel.
  * @update_scan_mode:	function to configure device and scan buffer when
  *			channels have changed
  * @debugfs_reg_access:	function to read or write register value of device
@@ -519,6 +536,8 @@ struct iio_info {
 
 	int (*validate_trigger)(struct iio_dev *indio_dev,
 				struct iio_trigger *trig);
+	int (*get_current_scan_type)(const struct iio_dev *indio_dev,
+				     const struct iio_chan_spec *chan);
 	int (*update_scan_mode)(struct iio_dev *indio_dev,
 				const unsigned long *scan_mask);
 	int (*debugfs_reg_access)(struct iio_dev *indio_dev,
@@ -804,6 +823,38 @@ static inline bool iio_read_acpi_mount_matrix(struct device *dev,
 }
 #endif
 
+/**
+ * iio_get_current_scan_type - Get the current scan type for a channel
+ * @indio_dev:	the IIO device to get the scan type for
+ * @chan:	the channel to get the scan type for
+ *
+ * Most devices only have one scan type per channel and can just access it
+ * directly without calling this function. Core IIO code and drivers that
+ * implement ext_scan_type in the channel spec should use this function to
+ * get the current scan type for a channel.
+ *
+ * Returns: the current scan type for the channel or error.
+ */
+static inline const struct iio_scan_type
+*iio_get_current_scan_type(const struct iio_dev *indio_dev,
+			   const struct iio_chan_spec *chan)
+{
+	int ret;
+
+	if (chan->has_ext_scan_type) {
+		ret = indio_dev->info->get_current_scan_type(indio_dev, chan);
+		if (ret < 0)
+			return ERR_PTR(ret);
+
+		if (ret >= chan->num_ext_scan_type)
+			return ERR_PTR(-EINVAL);
+
+		return &chan->ext_scan_type[ret];
+	}
+
+	return &chan->scan_type;
+}
+
 ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals);
 
 int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer,
-- 
cgit v1.2.3


From b6e6aca6c2b1b53fb4db4b672eaa2722a75aa6a2 Mon Sep 17 00:00:00 2001
From: Ramona Gradinariu <ramona.bolboaca13@gmail.com>
Date: Mon, 27 May 2024 17:26:12 +0300
Subject: iio: imu: adis_buffer: Add buffer setup API with buffer attributes

Add new API called devm_adis_setup_buffer_and_trigger_with_attrs() which
also takes buffer attributes as a parameter.
Rewrite devm_adis_setup_buffer_and_trigger() implementation such that it
calls devm_adis_setup_buffer_and_trigger_with_attrs() with buffer
attributes parameter NULL

Reviewed-by: Nuno Sa <nuno.sa@analog.com>
Signed-off-by: Ramona Gradinariu <ramona.bolboaca13@gmail.com>
Link: https://lore.kernel.org/r/20240527142618.275897-4-ramona.bolboaca13@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/imu/adis.h | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index 8898966bc0f0..0fe3a2f63033 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -21,6 +21,7 @@
 #define ADIS_REG_PAGE_ID 0x00
 
 struct adis;
+struct iio_dev_attr;
 
 /**
  * struct adis_timeouts - ADIS chip variant timeouts
@@ -515,11 +516,19 @@ int adis_single_conversion(struct iio_dev *indio_dev,
 #define ADIS_ROT_CHAN(mod, addr, si, info_sep, info_all, bits) \
 	ADIS_MOD_CHAN(IIO_ROT, mod, addr, si, info_sep, info_all, bits)
 
+#define devm_adis_setup_buffer_and_trigger(adis, indio_dev, trigger_handler)	\
+	devm_adis_setup_buffer_and_trigger_with_attrs((adis), (indio_dev),	\
+						      (trigger_handler), NULL,	\
+						      NULL)
+
 #ifdef CONFIG_IIO_ADIS_LIB_BUFFER
 
 int
-devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev,
-				   irq_handler_t trigger_handler);
+devm_adis_setup_buffer_and_trigger_with_attrs(struct adis *adis,
+					      struct iio_dev *indio_dev,
+					      irq_handler_t trigger_handler,
+					      const struct iio_buffer_setup_ops *ops,
+					      const struct iio_dev_attr **buffer_attrs);
 
 int devm_adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev);
 
@@ -529,8 +538,11 @@ int adis_update_scan_mode(struct iio_dev *indio_dev,
 #else /* CONFIG_IIO_BUFFER */
 
 static inline int
-devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev,
-				   irq_handler_t trigger_handler)
+devm_adis_setup_buffer_and_trigger_with_attrs(struct adis *adis,
+					      struct iio_dev *indio_dev,
+					      irq_handler_t trigger_handler,
+					      const struct iio_buffer_setup_ops *ops,
+					      const struct iio_dev_attr **buffer_attrs)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 01724ce2d9405b2246bbb69701c74880eb56a34b Mon Sep 17 00:00:00 2001
From: Ramona Gradinariu <ramona.bolboaca13@gmail.com>
Date: Mon, 27 May 2024 17:26:15 +0300
Subject: iio: imu: adis_trigger: Allow level interrupts for FIFO readings

Currently, adis library allows configuration only for edge interrupts,
needed for data ready sampling.
This patch removes the restriction for level interrupts for devices
which have FIFO support.
Furthermore, in case of devices which have FIFO support,
devm_request_threaded_irq is used for interrupt allocation, to avoid
flooding the processor with the FIFO watermark level interrupt, which
is active until enough data has been read from the FIFO.

Reviewed-by: Nuno Sa <nuno.sa@analog.com>
Signed-off-by: Ramona Gradinariu <ramona.bolboaca13@gmail.com>
Link: https://lore.kernel.org/r/20240527142618.275897-7-ramona.bolboaca13@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/imu/adis.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index 0fe3a2f63033..4bb0a53cf7ea 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -85,6 +85,7 @@ struct adis_data {
 	bool unmasked_drdy;
 
 	bool has_paging;
+	bool has_fifo;
 
 	unsigned int burst_reg_cmd;
 	unsigned int burst_len;
-- 
cgit v1.2.3


From 175d1825ca4d2288fee734ada0955a1e36dd50e6 Mon Sep 17 00:00:00 2001
From: Minwoo Im <minwoo.im@samsung.com>
Date: Sat, 1 Jun 2024 06:22:43 +0900
Subject: scsi: ufs: pci: Add support MCQ for QEMU-based UFS

Recently, ufs-mcq feature has been introduced to QEMU hw/ufs device [1].
This patch adds MCQ support for upstream QEMU UFS PCI controller.  This
patch provides mandatory vops callbacks to make UFS controller work
properly on MCQ mode.  Operation and Runtime Config register stride is
fixed to 48bytes which is implemented by qemu.

[1] https://lore.kernel.org/qemu-devel/cover.1716876237.git.jeuk20.kim@samsung.com/

Signed-off-by: Minwoo Im <minwoo.im@samsung.com>
Link: https://lore.kernel.org/r/20240531212244.1593535-2-minwoo.im@samsung.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index df68fb1d4f3f..9e0581115b34 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1278,6 +1278,7 @@ void ufshcd_update_evt_hist(struct ufs_hba *hba, u32 id, u32 val);
 void ufshcd_hba_stop(struct ufs_hba *hba);
 void ufshcd_schedule_eh_work(struct ufs_hba *hba);
 void ufshcd_mcq_config_mac(struct ufs_hba *hba, u32 max_active_cmds);
+unsigned int ufshcd_mcq_queue_cfg_addr(struct ufs_hba *hba);
 u32 ufshcd_mcq_read_cqis(struct ufs_hba *hba, int i);
 void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i);
 unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
-- 
cgit v1.2.3


From ba098ed9829c5de4d65e5708d08d3508f2e70a7d Mon Sep 17 00:00:00 2001
From: Daisuke Nojiri <dnojiri@chromium.org>
Date: Tue, 4 Jun 2024 10:01:48 -0700
Subject: platform/chrome: Add struct ec_response_get_next_event_v3

Add struct ec_response_get_next_event_v3 to upgrade
EC_CMD_GET_NEXT_EVENT to version 3.

Signed-off-by: Daisuke Nojiri <dnojiri@chromium.org>
Link: https://lore.kernel.org/r/20240604170552.2517189-1-dnojiri@chromium.org
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 include/linux/platform_data/cros_ec_commands.h | 34 ++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
index ecc47d5fe239..ec598057d1da 100644
--- a/include/linux/platform_data/cros_ec_commands.h
+++ b/include/linux/platform_data/cros_ec_commands.h
@@ -3463,6 +3463,34 @@ union __ec_align_offset1 ec_response_get_next_data_v1 {
 };
 BUILD_ASSERT(sizeof(union ec_response_get_next_data_v1) == 16);
 
+union __ec_align_offset1 ec_response_get_next_data_v3 {
+	uint8_t key_matrix[18];
+
+	/* Unaligned */
+	uint32_t host_event;
+	uint64_t host_event64;
+
+	struct __ec_todo_unpacked {
+		/* For aligning the fifo_info */
+		uint8_t reserved[3];
+		struct ec_response_motion_sense_fifo_info info;
+	} sensor_fifo;
+
+	uint32_t buttons;
+
+	uint32_t switches;
+
+	uint32_t fp_events;
+
+	uint32_t sysrq;
+
+	/* CEC events from enum mkbp_cec_event */
+	uint32_t cec_events;
+
+	uint8_t cec_message[16];
+};
+BUILD_ASSERT(sizeof(union ec_response_get_next_data_v3) == 18);
+
 struct ec_response_get_next_event {
 	uint8_t event_type;
 	/* Followed by event data if any */
@@ -3475,6 +3503,12 @@ struct ec_response_get_next_event_v1 {
 	union ec_response_get_next_data_v1 data;
 } __ec_align1;
 
+struct ec_response_get_next_event_v3 {
+	uint8_t event_type;
+	/* Followed by event data if any */
+	union ec_response_get_next_data_v3 data;
+} __ec_align1;
+
 /* Bit indices for buttons and switches.*/
 /* Buttons */
 #define EC_MKBP_POWER_BUTTON	0
-- 
cgit v1.2.3


From 106d6739823369c734a8fc3b13634274eee4f60e Mon Sep 17 00:00:00 2001
From: Daisuke Nojiri <dnojiri@chromium.org>
Date: Tue, 4 Jun 2024 16:08:25 -0700
Subject: platform/chrome: cros_ec_proto: Upgrade get_next_event to v3

Upgrade EC_CMD_GET_NEXT_EVENT to version 3.

The max supported version will be v3. So, we speak v3 even if the EC
says it supports v4+.

Signed-off-by: Daisuke Nojiri <dnojiri@chromium.org>
Link: https://lore.kernel.org/r/20240604230837.2878737-1-dnojiri@chromium.org
[tzungbi: uint32_t -> u32 per suggested by checkpatch.pl]
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 include/linux/platform_data/cros_ec_proto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 1ddc52603f9a..6e9225bdf903 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -185,7 +185,7 @@ struct cros_ec_device {
 	bool host_sleep_v1;
 	struct blocking_notifier_head event_notifier;
 
-	struct ec_response_get_next_event_v1 event_data;
+	struct ec_response_get_next_event_v3 event_data;
 	int event_size;
 	u32 host_event_wake_mask;
 	u32 last_resume_result;
-- 
cgit v1.2.3


From 93bbbcb1e762a49fc18ee1272545b77371595f1e Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Sat, 25 May 2024 02:30:39 +0000
Subject: mm/memblock: fix a typo in description of for_each_mem_region()

No functional change.

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Link: https://lore.kernel.org/r/20240525023040.13509-2-richard.weiyang@gmail.com
Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
---
 include/linux/memblock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index e2082240586d..6cf18dc2b4d0 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -565,7 +565,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
 }
 
 /**
- * for_each_mem_region - itereate over memory regions
+ * for_each_mem_region - iterate over memory regions
  * @region: loop variable
  */
 #define for_each_mem_region(region)					\
-- 
cgit v1.2.3


From 172e422ffea20a89bfdc672741c1aad6fbb5044e Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sun, 12 May 2024 13:30:07 +0200
Subject: fsnotify: clear PARENT_WATCHED flags lazily

In some setups directories can have many (usually negative) dentries.
Hence __fsnotify_update_child_dentry_flags() function can take a
significant amount of time. Since the bulk of this function happens
under inode->i_lock this causes a significant contention on the lock
when we remove the watch from the directory as the
__fsnotify_update_child_dentry_flags() call from fsnotify_recalc_mask()
races with __fsnotify_update_child_dentry_flags() calls from
__fsnotify_parent() happening on children. This can lead upto softlockup
reports reported by users.

Fix the problem by calling fsnotify_update_children_dentry_flags() to
set PARENT_WATCHED flags only when parent starts watching children.

When parent stops watching children, clear false positive PARENT_WATCHED
flags lazily in __fsnotify_parent() for each accessed child.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Stephen Brennan <stephen.s.brennan@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fsnotify_backend.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 4dd6143db271..8be029bc50b1 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -594,12 +594,14 @@ static inline __u32 fsnotify_parent_needed_mask(__u32 mask)
 
 static inline int fsnotify_inode_watches_children(struct inode *inode)
 {
+	__u32 parent_mask = READ_ONCE(inode->i_fsnotify_mask);
+
 	/* FS_EVENT_ON_CHILD is set if the inode may care */
-	if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD))
+	if (!(parent_mask & FS_EVENT_ON_CHILD))
 		return 0;
 	/* this inode might care about child events, does it care about the
 	 * specific set of events that can happen on a child? */
-	return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD;
+	return parent_mask & FS_EVENTS_POSS_ON_CHILD;
 }
 
 /*
@@ -613,7 +615,7 @@ static inline void fsnotify_update_flags(struct dentry *dentry)
 	/*
 	 * Serialisation of setting PARENT_WATCHED on the dentries is provided
 	 * by d_lock. If inotify_inode_watched changes after we have taken
-	 * d_lock, the following __fsnotify_update_child_dentry_flags call will
+	 * d_lock, the following fsnotify_set_children_dentry_flags call will
 	 * find our entry, so it will spin until we complete here, and update
 	 * us with the new state.
 	 */
-- 
cgit v1.2.3


From 61e2bbafb00e4b9a5de45e6448a7b6b818658576 Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Fri, 31 May 2024 23:46:34 +0800
Subject: net: remove NULL-pointer net parameter in ip_metrics_convert

When I was doing some experiments, I found that when using the first
parameter, namely, struct net, in ip_metrics_convert() always triggers NULL
pointer crash. Then I digged into this part, realizing that we can remove
this one due to its uselessness.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h  | 3 +--
 include/net/tcp.h | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 6d735e00d3f3..c5606cadb1a5 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -506,8 +506,7 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
 	return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu);
 }
 
-struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
-					int fc_mx_len,
+struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len,
 					struct netlink_ext_ack *extack);
 static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics)
 {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 08c3b99501cf..a70fc39090fe 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1224,7 +1224,7 @@ extern struct tcp_congestion_ops tcp_reno;
 
 struct tcp_congestion_ops *tcp_ca_find(const char *name);
 struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
-u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca);
+u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca);
 #ifdef CONFIG_INET
 char *tcp_ca_get_name_by_key(u32 key, char *buffer);
 #else
-- 
cgit v1.2.3


From 6f49c3fb563c0a95a838216eaf7d9b02ece44bf5 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Sat, 1 Jun 2024 00:29:17 +0100
Subject: net: caif: remove unused structs

'cfpktq' has been unused since
commit 73d6ac633c6c ("caif: code cleanup").

'caif_packet_funcs' is declared but never defined.

Remove both of them.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/caif_layer.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h
index 51f7bb42a936..0f45d875905f 100644
--- a/include/net/caif/caif_layer.h
+++ b/include/net/caif/caif_layer.h
@@ -11,9 +11,7 @@
 
 struct cflayer;
 struct cfpkt;
-struct cfpktq;
 struct caif_payload_info;
-struct caif_packet_funcs;
 
 #define CAIF_LAYER_NAME_SZ 16
 
-- 
cgit v1.2.3


From 310ec03841a36e3f45fb528f0dfdfe5b9e84b037 Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Wed, 5 Jun 2024 13:26:05 +1200
Subject: dma-buf: align fd_flags and heap_flags with dma_heap_allocation_data

dma_heap_allocation_data defines the UAPI as follows:

 struct dma_heap_allocation_data {
 	__u64 len;
 	__u32 fd;
 	__u32 fd_flags;
 	__u64 heap_flags;
 };

However, dma_heap_buffer_alloc() casts both fd_flags and heap_flags
into unsigned int. We're inconsistent with types in the non UAPI
arguments. This patch fixes it.

Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Acked-by: John Stultz <jstultz@google.com>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240605012605.5341-1-21cnbao@gmail.com
---
 include/uapi/linux/dma-heap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/dma-heap.h b/include/uapi/linux/dma-heap.h
index 6f84fa08e074..a4cf716a49fa 100644
--- a/include/uapi/linux/dma-heap.h
+++ b/include/uapi/linux/dma-heap.h
@@ -19,7 +19,7 @@
 #define DMA_HEAP_VALID_FD_FLAGS (O_CLOEXEC | O_ACCMODE)
 
 /* Currently no heap flags */
-#define DMA_HEAP_VALID_HEAP_FLAGS (0)
+#define DMA_HEAP_VALID_HEAP_FLAGS (0ULL)
 
 /**
  * struct dma_heap_allocation_data - metadata passed from userspace for
-- 
cgit v1.2.3


From 82dc29b9737edf2d13561ebcf6212c0b88c41129 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 2 Jun 2024 16:18:52 +0200
Subject: devlink: Constify the 'table_ops' parameter of
 devl_dpipe_table_register()

"struct devlink_dpipe_table_ops" only contains some function pointers.

Update "struct devlink_dpipe_table" and the 'table_ops' parameter of
devl_dpipe_table_register() so that structures in drivers can be
constified.

Constifying these structures will move some data to a read-only section, so
increase overall security.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 35eb0f884386..db5eff6cb60f 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -352,7 +352,7 @@ struct devlink_dpipe_table {
 	bool resource_valid;
 	u64 resource_id;
 	u64 resource_units;
-	struct devlink_dpipe_table_ops *table_ops;
+	const struct devlink_dpipe_table_ops *table_ops;
 	struct rcu_head rcu;
 };
 
@@ -1751,7 +1751,7 @@ void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index);
 void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index);
 int devl_dpipe_table_register(struct devlink *devlink,
 			      const char *table_name,
-			      struct devlink_dpipe_table_ops *table_ops,
+			      const struct devlink_dpipe_table_ops *table_ops,
 			      void *priv, bool counter_control_extern);
 void devl_dpipe_table_unregister(struct devlink *devlink,
 				 const char *table_name);
-- 
cgit v1.2.3


From 5bbe5872fed4497a192556426d75fd9223c5bfeb Mon Sep 17 00:00:00 2001
From: Xianwei Zhao <xianwei.zhao@amlogic.com>
Date: Wed, 29 May 2024 11:10:33 +0800
Subject: dt-bindings: power: add Amlogic A4 power domains

Add devicetree binding document and related header file for
Amlogic A4 secure power domains.

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Xianwei Zhao <xianwei.zhao@amlogic.com>
Link: https://lore.kernel.org/r/20240529-a4_secpowerdomain-v2-1-47502fc0eaf3@amlogic.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/dt-bindings/power/amlogic,a4-pwrc.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 include/dt-bindings/power/amlogic,a4-pwrc.h

(limited to 'include')

diff --git a/include/dt-bindings/power/amlogic,a4-pwrc.h b/include/dt-bindings/power/amlogic,a4-pwrc.h
new file mode 100644
index 000000000000..bd2f9c558d22
--- /dev/null
+++ b/include/dt-bindings/power/amlogic,a4-pwrc.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */
+/*
+ * Copyright (C) 2024 Amlogic, Inc. All rights reserved
+ */
+#ifndef _DT_BINDINGS_AMLOGIC_A4_POWER_H
+#define _DT_BINDINGS_AMLOGIC_A4_POWER_H
+
+#define PWRC_A4_AUDIO_ID				0
+#define PWRC_A4_SDIOA_ID				1
+#define PWRC_A4_EMMC_ID					2
+#define PWRC_A4_USB_COMB_ID				3
+#define PWRC_A4_ETH_ID					4
+#define PWRC_A4_VOUT_ID					5
+#define PWRC_A4_AUDIO_PDM_ID				6
+#define PWRC_A4_DMC_ID					7
+#define PWRC_A4_SYS_WRAP_ID				8
+#define PWRC_A4_AO_I2C_S_ID				9
+#define PWRC_A4_AO_UART_ID				10
+#define PWRC_A4_AO_IR_ID				11
+
+#endif
-- 
cgit v1.2.3


From f086edef71be7174a16c1ed67ac65a085cda28b1 Mon Sep 17 00:00:00 2001
From: Kevin Yang <yyd@google.com>
Date: Mon, 3 Jun 2024 21:30:54 +0000
Subject: tcp: add sysctl_tcp_rto_min_us

Adding a sysctl knob to allow user to specify a default
rto_min at socket init time, other than using the hard
coded 200ms default rto_min.

Note that the rto_min route option has the highest precedence
for configuring this setting, followed by the TCP_BPF_RTO_MIN
socket option, followed by the tcp_rto_min_us sysctl.

Signed-off-by: Kevin Yang <yyd@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Yuchung Cheng <ycheng@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index c356c458b340..a91bb971f901 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -170,6 +170,7 @@ struct netns_ipv4 {
 	u8 sysctl_tcp_sack;
 	u8 sysctl_tcp_window_scaling;
 	u8 sysctl_tcp_timestamps;
+	int sysctl_tcp_rto_min_us;
 	u8 sysctl_tcp_recovery;
 	u8 sysctl_tcp_thin_linear_timeouts;
 	u8 sysctl_tcp_slow_start_after_idle;
-- 
cgit v1.2.3


From 6fef518594bcb7e374f809717281bd02894929f8 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <isaku.yamahata@intel.com>
Date: Thu, 25 Apr 2024 15:07:01 -0700
Subject: KVM: x86: Add a capability to configure bus frequency for APIC timer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add KVM_CAP_X86_APIC_BUS_CYCLES_NS capability to configure the APIC
bus clock frequency for APIC timer emulation.
Allow KVM_ENABLE_CAPABILITY(KVM_CAP_X86_APIC_BUS_CYCLES_NS) to set the
frequency in nanoseconds. When using this capability, the user space
VMM should configure CPUID leaf 0x15 to advertise the frequency.

Vishal reported that the TDX guest kernel expects a 25MHz APIC bus
frequency but ends up getting interrupts at a significantly higher rate.

The TDX architecture hard-codes the core crystal clock frequency to
25MHz and mandates exposing it via CPUID leaf 0x15. The TDX architecture
does not allow the VMM to override the value.

In addition, per Intel SDM:
    "The APIC timer frequency will be the processor’s bus clock or core
     crystal clock frequency (when TSC/core crystal clock ratio is
     enumerated in CPUID leaf 0x15) divided by the value specified in
     the divide configuration register."

The resulting 25MHz APIC bus frequency conflicts with the KVM hardcoded
APIC bus frequency of 1GHz.

The KVM doesn't enumerate CPUID leaf 0x15 to the guest unless the user
space VMM sets it using KVM_SET_CPUID. If the CPUID leaf 0x15 is
enumerated, the guest kernel uses it as the APIC bus frequency. If not,
the guest kernel measures the frequency based on other known timers like
the ACPI timer or the legacy PIT. As reported by Vishal the TDX guest
kernel expects a 25MHz timer frequency but gets timer interrupt more
frequently due to the 1GHz frequency used by KVM.

To ensure that the guest doesn't have a conflicting view of the APIC bus
frequency, allow the userspace to tell KVM to use the same frequency that
TDX mandates instead of the default 1Ghz.

Reported-by: Vishal Annapurve <vannapurve@google.com>
Closes: https://lore.kernel.org/lkml/20231006011255.4163884-1-vannapurve@google.com
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Co-developed-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Yuan Yao <yuan.yao@intel.com>
Link: https://lore.kernel.org/r/6748a4c12269e756f0c48680da8ccc5367c31ce7.1714081726.git.reinette.chatre@intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/uapi/linux/kvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d03842abae57..ec998e6b6555 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -917,6 +917,7 @@ struct kvm_enable_cap {
 #define KVM_CAP_MEMORY_ATTRIBUTES 233
 #define KVM_CAP_GUEST_MEMFD 234
 #define KVM_CAP_VM_TYPES 235
+#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 236
 
 struct kvm_irq_routing_irqchip {
 	__u32 irqchip;
-- 
cgit v1.2.3


From f0dc887f21d18791037c0166f652c67da761f16f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 27 May 2024 17:34:47 -0700
Subject: sched/core: Move preempt_model_*() helpers from sched.h to preempt.h

Move the declarations and inlined implementations of the preempt_model_*()
helpers to preempt.h so that they can be referenced in spinlock.h without
creating a potential circular dependency between spinlock.h and sched.h.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ankur Arora <ankur.a.arora@oracle.com>
Link: https://lkml.kernel.org/r/20240528003521.979836-2-ankur.a.arora@oracle.com
---
 include/linux/preempt.h | 41 +++++++++++++++++++++++++++++++++++++++++
 include/linux/sched.h   | 41 -----------------------------------------
 2 files changed, 41 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 7233e9cf1bab..ce76f1a45722 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -481,4 +481,45 @@ DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
 DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
 DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
 
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+extern bool preempt_model_none(void);
+extern bool preempt_model_voluntary(void);
+extern bool preempt_model_full(void);
+
+#else
+
+static inline bool preempt_model_none(void)
+{
+	return IS_ENABLED(CONFIG_PREEMPT_NONE);
+}
+static inline bool preempt_model_voluntary(void)
+{
+	return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY);
+}
+static inline bool preempt_model_full(void)
+{
+	return IS_ENABLED(CONFIG_PREEMPT);
+}
+
+#endif
+
+static inline bool preempt_model_rt(void)
+{
+	return IS_ENABLED(CONFIG_PREEMPT_RT);
+}
+
+/*
+ * Does the preemption model allow non-cooperative preemption?
+ *
+ * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with
+ * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the
+ * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the
+ * PREEMPT_NONE model.
+ */
+static inline bool preempt_model_preemptible(void)
+{
+	return preempt_model_full() || preempt_model_rt();
+}
+
 #endif /* __LINUX_PREEMPT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61591ac6eab6..90691d99027e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2064,47 +2064,6 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock);
 	__cond_resched_rwlock_write(lock);					\
 })
 
-#ifdef CONFIG_PREEMPT_DYNAMIC
-
-extern bool preempt_model_none(void);
-extern bool preempt_model_voluntary(void);
-extern bool preempt_model_full(void);
-
-#else
-
-static inline bool preempt_model_none(void)
-{
-	return IS_ENABLED(CONFIG_PREEMPT_NONE);
-}
-static inline bool preempt_model_voluntary(void)
-{
-	return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY);
-}
-static inline bool preempt_model_full(void)
-{
-	return IS_ENABLED(CONFIG_PREEMPT);
-}
-
-#endif
-
-static inline bool preempt_model_rt(void)
-{
-	return IS_ENABLED(CONFIG_PREEMPT_RT);
-}
-
-/*
- * Does the preemption model allow non-cooperative preemption?
- *
- * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with
- * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the
- * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the
- * PREEMPT_NONE model.
- */
-static inline bool preempt_model_preemptible(void)
-{
-	return preempt_model_full() || preempt_model_rt();
-}
-
 static __always_inline bool need_resched(void)
 {
 	return unlikely(tif_need_resched());
-- 
cgit v1.2.3


From c793a62823d1ce8f70d9cfc7803e3ea436277cda Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 27 May 2024 17:34:48 -0700
Subject: sched/core: Drop spinlocks on contention iff kernel is preemptible

Use preempt_model_preemptible() to detect a preemptible kernel when
deciding whether or not to reschedule in order to drop a contended
spinlock or rwlock.  Because PREEMPT_DYNAMIC selects PREEMPTION, kernels
built with PREEMPT_DYNAMIC=y will yield contended locks even if the live
preemption model is "none" or "voluntary".  In short, make kernels with
dynamically selected models behave the same as kernels with statically
selected models.

Somewhat counter-intuitively, NOT yielding a lock can provide better
latency for the relevant tasks/processes.  E.g. KVM x86's mmu_lock, a
rwlock, is often contended between an invalidation event (takes mmu_lock
for write) and a vCPU servicing a guest page fault (takes mmu_lock for
read).  For _some_ setups, letting the invalidation task complete even
if there is mmu_lock contention provides lower latency for *all* tasks,
i.e. the invalidation completes sooner *and* the vCPU services the guest
page fault sooner.

But even KVM's mmu_lock behavior isn't uniform, e.g. the "best" behavior
can vary depending on the host VMM, the guest workload, the number of
vCPUs, the number of pCPUs in the host, why there is lock contention, etc.

In other words, simply deleting the CONFIG_PREEMPTION guard (or doing the
opposite and removing contention yielding entirely) needs to come with a
big pile of data proving that changing the status quo is a net positive.

Opportunistically document this side effect of preempt=full, as yielding
contended spinlocks can have significant, user-visible impact.

Fixes: c597bfddc9e9 ("sched: Provide Kconfig support for default dynamic preempt mode")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ankur Arora <ankur.a.arora@oracle.com>
Reviewed-by: Chen Yu <yu.c.chen@intel.com>
Link: https://lore.kernel.org/kvm/ef81ff36-64bb-4cfe-ae9b-e3acf47bff24@proxmox.com
---
 include/linux/spinlock.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 3fcd20de6ca8..63dd8cf3c3c2 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -462,11 +462,10 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
  */
 static inline int spin_needbreak(spinlock_t *lock)
 {
-#ifdef CONFIG_PREEMPTION
+	if (!preempt_model_preemptible())
+		return 0;
+
 	return spin_is_contended(lock);
-#else
-	return 0;
-#endif
 }
 
 /*
@@ -479,11 +478,10 @@ static inline int spin_needbreak(spinlock_t *lock)
  */
 static inline int rwlock_needbreak(rwlock_t *lock)
 {
-#ifdef CONFIG_PREEMPTION
+	if (!preempt_model_preemptible())
+		return 0;
+
 	return rwlock_is_contended(lock);
-#else
-	return 0;
-#endif
 }
 
 /*
-- 
cgit v1.2.3


From dff60734fc7606fabde668ab6a26feacec8787cc Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Tue, 4 Jun 2024 17:52:56 +0200
Subject: vfs: retire user_path_at_empty and drop empty arg from getname_flags

No users after do_readlinkat started doing the job on its own.

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://lore.kernel.org/r/20240604155257.109500-3-mjguzik@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h    | 2 +-
 include/linux/namei.h | 8 +-------
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 639885621608..bfc1e6407bf6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2701,7 +2701,7 @@ static inline struct file *file_clone_open(struct file *file)
 }
 extern int filp_close(struct file *, fl_owner_t id);
 
-extern struct filename *getname_flags(const char __user *, int, int *);
+extern struct filename *getname_flags(const char __user *, int);
 extern struct filename *getname_uflags(const char __user *, int);
 extern struct filename *getname(const char __user *);
 extern struct filename *getname_kernel(const char *);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 967aa9ea9f96..8ec8fed3bce8 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -50,13 +50,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
 
 extern int path_pts(struct path *path);
 
-extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);
-
-static inline int user_path_at(int dfd, const char __user *name, unsigned flags,
-		 struct path *path)
-{
-	return user_path_at_empty(dfd, name, flags, path, NULL);
-}
+extern int user_path_at(int, const char __user *, unsigned, struct path *);
 
 struct dentry *lookup_one_qstr_excl(const struct qstr *name,
 				    struct dentry *base,
-- 
cgit v1.2.3


From 758191c9ea7bcc45dd99398a538ae4ab27c4029e Mon Sep 17 00:00:00 2001
From: Yoray Zack <yorayz@nvidia.com>
Date: Tue, 4 Jun 2024 00:22:17 +0300
Subject: net/mlx5e: SHAMPO, Use KSMs instead of KLMs

KSM Mkey is KLM Mkey with a fixed buffer size. Due to this fact,
it is a faster mechanism than KLM.

SHAMPO feature used KLMs Mkeys for memory mappings of its headers buffer.
As it used KLMs with the same buffer size for each entry,
we can use KSMs instead.

This commit changes the Mkeys that map the SHAMPO headers buffer
from KLMs to KSMs.

Signed-off-by: Yoray Zack <yorayz@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/20240603212219.1037656-13-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index d7bb31d9a446..da09bfaa7b81 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -294,6 +294,7 @@ enum {
 #define MLX5_UMR_FLEX_ALIGNMENT 0x40
 #define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt))
 #define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm))
+#define MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_ksm))
 
 #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
 
-- 
cgit v1.2.3


From 99be56171fa9ffea494dfe3d4a7f6e7e51630c2e Mon Sep 17 00:00:00 2001
From: Yoray Zack <yorayz@nvidia.com>
Date: Tue, 4 Jun 2024 00:22:18 +0300
Subject: net/mlx5e: SHAMPO, Re-enable HW-GRO

Add back HW-GRO to the reported features.

As the current implementation of HW-GRO uses KSMs with a
specific fixed buffer size (256B) to map its headers buffer,
we reported the feature only if the NIC is supporting KSM and
the minimum value for buffer size is below the requested one.

iperf3 bandwidth comparison:
+---------+--------+--------+-----------+
| streams | SW GRO | HW GRO | Unit      |
|---------+--------+--------+-----------|
| 1       | 36     | 42     | Gbits/sec |
| 4       | 34     | 39     | Gbits/sec |
| 8       | 31     | 35     | Gbits/sec |
+---------+--------+--------+-----------+

A downstream patch will add skb fragment coalescing which will improve
performance considerably.

Benchmark details:
VM based setup
CPU: Intel(R) Xeon(R) Platinum 8380 CPU, 24 cores
NIC: ConnectX-7 100GbE
iperf3 and irq running on same CPU over a single receive queue

Signed-off-by: Yoray Zack <yorayz@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/20240603212219.1037656-14-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 5df52e15f7d6..17acd0f3ca8e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1526,8 +1526,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         ts_cqe_to_dest_cqn[0x1];
 	u8         reserved_at_b3[0x6];
 	u8         go_back_n[0x1];
-	u8         shampo[0x1];
-	u8         reserved_at_bb[0x5];
+	u8         reserved_at_ba[0x6];
 
 	u8         max_sgl_for_optimized_performance[0x8];
 	u8         log_max_cq_sz[0x8];
@@ -1744,7 +1743,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_280[0x10];
 	u8         max_wqe_sz_sq[0x10];
 
-	u8         reserved_at_2a0[0x10];
+	u8         reserved_at_2a0[0xb];
+	u8         shampo[0x1];
+	u8         reserved_at_2ac[0x4];
 	u8         max_wqe_sz_rq[0x10];
 
 	u8         max_flow_counter_31_16[0x10];
@@ -2017,7 +2018,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   reserved_at_250[0x10];
 
 	u8	   reserved_at_260[0x120];
-	u8	   reserved_at_380[0x10];
+	u8	   reserved_at_380[0xb];
+	u8	   min_mkey_log_entity_size_fixed_buffer[0x5];
 	u8	   ec_vf_vport_base[0x10];
 
 	u8	   reserved_at_3a0[0x10];
@@ -2029,7 +2031,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   pcc_ifa2[0x1];
 	u8	   reserved_at_3f1[0xf];
 
-	u8	   reserved_at_400[0x400];
+	u8	   reserved_at_400[0x1];
+	u8	   min_mkey_log_entity_size_fixed_buffer_valid[0x1];
+	u8	   reserved_at_402[0x1e];
+
+	u8	   reserved_at_420[0x3e0];
 };
 
 enum mlx5_ifc_flow_destination_type {
-- 
cgit v1.2.3


From f1180fd2a7c039691b64ebf404c746a74e40b7b0 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Wed, 5 Jun 2024 07:13:39 +0000
Subject: mm/mm_init.c: not always search next deferred_init_pfn from very
 beginning

In function deferred_init_memmap(), we call
deferred_init_mem_pfn_range_in_zone() to get the next deferred_init_pfn.
But we always search it from the very beginning.

Since we save the index in i, we can leverage this to search from i next
time.

[rppt refine the comment]

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Link: https://lore.kernel.org/all/20240605071339.15330-1-richard.weiyang@gmail.com
Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
---
 include/linux/memblock.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6cf18dc2b4d0..45cac33334c8 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -299,25 +299,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
 				  unsigned long *out_spfn,
 				  unsigned long *out_epfn);
-/**
- * for_each_free_mem_pfn_range_in_zone - iterate through zone specific free
- * memblock areas
- * @i: u64 used as loop variable
- * @zone: zone in which all of the memory blocks reside
- * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
- * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
- *
- * Walks over free (memory && !reserved) areas of memblock in a specific
- * zone. Available once memblock and an empty zone is initialized. The main
- * assumption is that the zone start, end, and pgdat have been associated.
- * This way we can use the zone to determine NUMA node, and if a given part
- * of the memblock is valid for the zone.
- */
-#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end)	\
-	for (i = 0,							\
-	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end);	\
-	     i != U64_MAX;					\
-	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
 
 /**
  * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific
-- 
cgit v1.2.3


From b4cb4a1391dcdc640c4ade003aaf0ee19cc8d509 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 4 Jun 2024 11:16:03 +0000
Subject: net: use unrcu_pointer() helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Toke mentioned unrcu_pointer() existence, allowing
to remove some of the ugly casts we have when using
xchg() for rcu protected pointers.

Also make inet_rcv_compat const.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Toke Høiland-Jørgensen <toke@redhat.com>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20240604111603.45871-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 953c8dc4e259..b30ea0c342a6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2095,7 +2095,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst)
 
 	sk_tx_queue_clear(sk);
 	WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
-	old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst);
+	old_dst = unrcu_pointer(xchg(&sk->sk_dst_cache, RCU_INITIALIZER(dst)));
 	dst_release(old_dst);
 }
 
-- 
cgit v1.2.3


From c34506406dd5cfb352f8c53bb6a1b9535c0905dd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 5 Jun 2024 07:15:51 +0000
Subject: tcp: small changes in reqsk_put() and reqsk_free()

In reqsk_free(), use DEBUG_NET_WARN_ON_ONCE()
instead of WARN_ON_ONCE() for a condition which never fired.

In reqsk_put() directly call __reqsk_free(), there is no
point checking rsk_refcnt again right after a transition to zero.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/request_sock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index ebcb8896bffc..a8f82216c628 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -172,14 +172,14 @@ static inline void __reqsk_free(struct request_sock *req)
 
 static inline void reqsk_free(struct request_sock *req)
 {
-	WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
+	DEBUG_NET_WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
 	__reqsk_free(req);
 }
 
 static inline void reqsk_put(struct request_sock *req)
 {
 	if (refcount_dec_and_test(&req->rsk_refcnt))
-		reqsk_free(req);
+		__reqsk_free(req);
 }
 
 /*
-- 
cgit v1.2.3


From 6971d21672827a701c5ea180891b7ea6cf06f6a7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 5 Jun 2024 07:15:53 +0000
Subject: tcp: move reqsk_alloc() to inet_connection_sock.c

reqsk_alloc() has a single caller, no need to expose it
in include/net/request_sock.h.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/request_sock.h | 33 ---------------------------------
 1 file changed, 33 deletions(-)

(limited to 'include')

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index a8f82216c628..b07b1cd14e9f 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -128,39 +128,6 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb,
 	return sk;
 }
 
-static inline struct request_sock *
-reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener,
-	    bool attach_listener)
-{
-	struct request_sock *req;
-
-	req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN);
-	if (!req)
-		return NULL;
-	req->rsk_listener = NULL;
-	if (attach_listener) {
-		if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) {
-			kmem_cache_free(ops->slab, req);
-			return NULL;
-		}
-		req->rsk_listener = sk_listener;
-	}
-	req->rsk_ops = ops;
-	req_to_sk(req)->sk_prot = sk_listener->sk_prot;
-	sk_node_init(&req_to_sk(req)->sk_node);
-	sk_tx_queue_clear(req_to_sk(req));
-	req->saved_syn = NULL;
-	req->syncookie = 0;
-	req->timeout = 0;
-	req->num_timeout = 0;
-	req->num_retrans = 0;
-	req->sk = NULL;
-	refcount_set(&req->rsk_refcnt, 0);
-
-	return req;
-}
-#define reqsk_alloc(...)	alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__))
-
 static inline void __reqsk_free(struct request_sock *req)
 {
 	req->rsk_ops->destructor(req);
-- 
cgit v1.2.3


From 3ed96977a3c5b0a9b017d626600402be3089d4fc Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 4 Jun 2024 13:54:38 -0400
Subject: drm/mm: Remove unused drm_mm_replace_node

Last caller was removed with commit 078a5b498d6a ("drm/tests:
Remove slow tests").

Cc: Maxime Ripard <mripard@kernel.org>
Acked-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240604175438.48125-1-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/drm_mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index ac33ba1b18bc..f654874c4ce6 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -463,7 +463,6 @@ static inline int drm_mm_insert_node(struct drm_mm *mm,
 }
 
 void drm_mm_remove_node(struct drm_mm_node *node);
-void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new);
 void drm_mm_init(struct drm_mm *mm, u64 start, u64 size);
 void drm_mm_takedown(struct drm_mm *mm);
 
-- 
cgit v1.2.3


From ababa16fd9bd0e2727a1c31c4fb68d6be053bddc Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lpieralisi@kernel.org>
Date: Thu, 6 Jun 2024 11:42:38 +0200
Subject: irqchip/gic-v3: Enable non-coherent redistributors/ITSes ACPI probing

The GIC architecture specification defines a set of registers for
redistributors and ITSes that control the sharebility and cacheability
attributes of redistributors/ITSes initiator ports on the interconnect
(GICR_[V]PROPBASER, GICR_[V]PENDBASER, GITS_BASER<n>).

Architecturally the GIC provides a means to drive shareability and
cacheability attributes signals but it is not mandatory for designs to
wire up the corresponding interconnect signals that control the
cacheability/shareability of transactions.

Redistributors and ITSes interconnect ports can be connected to
non-coherent interconnects that are not able to manage the
shareability/cacheability attributes; this implicitly makes the
redistributors and ITSes non-coherent observers.

To enable non-coherent GIC designs on ACPI based systems, parse the MADT
GICC/GICR/ITS subtables non-coherent flags to determine whether the
respective components are non-coherent observers and force the
shareability attributes to be programmed into the redistributors and
ITSes registers.

An ACPI global function (acpi_get_madt_revision()) is added to retrieve
the MADT revision, in that it is essential to check the MADT revision
before checking for flags that were added with MADT revision 7 so that
if the kernel is booted with an ACPI MADT table with revision < 7 it
skips parsing the newly added flags (that should be zeroed reserved
values for MADT versions < 7 but they could turn out to be buggy and
should be ignored).

Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Link: https://lore.kernel.org/r/20240606094238.757649-2-lpieralisi@kernel.org
---
 include/linux/acpi.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 28c3fb2bef0d..000d339e1596 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -279,6 +279,9 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id)
 	return phys_id == PHYS_CPUID_INVALID;
 }
 
+
+int __init acpi_get_madt_revision(void);
+
 /* Validate the processor object's proc_id */
 bool acpi_duplicate_processor_id(int proc_id);
 /* Processor _CTS control */
-- 
cgit v1.2.3


From 0ee14725471cea66e03e3cd4f4c582d759de502c Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 6 Jun 2024 15:46:09 +0100
Subject: mm/util: Swap kmemdup_array() arguments

GCC 14.1 complains about the argument usage of kmemdup_array():

  drivers/soc/tegra/fuse/fuse-tegra.c:130:65: error: 'kmemdup_array' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Werror=calloc-transposed-args]
    130 |         fuse->lookups = kmemdup_array(fuse->soc->lookups, sizeof(*fuse->lookups),
        |                                                                 ^
  drivers/soc/tegra/fuse/fuse-tegra.c:130:65: note: earlier argument should specify number of elements, later size of each element

The annotation introduced by commit 7d78a7773355 ("string: Add
additional __realloc_size() annotations for "dup" helpers") lets the
compiler think that kmemdup_array() follows the same format as calloc(),
with the number of elements preceding the size of one element. So we
could simply swap the arguments to __realloc_size() to get rid of that
warning, but it seems cleaner to instead have kmemdup_array() follow the
same format as krealloc_array(), memdup_array_user(), calloc() etc.

Fixes: 7d78a7773355 ("string: Add additional __realloc_size() annotations for "dup" helpers")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20240606144608.97817-2-jean-philippe@linaro.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/string.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/string.h b/include/linux/string.h
index 60168aa2af07..9edace076ddb 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -289,7 +289,7 @@ extern void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) __realloc_si
 
 extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2);
 extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp);
-extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp)
+extern void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp)
 		__realloc_size(2, 3);
 
 /* lib/argv_split.c */
-- 
cgit v1.2.3


From 0d11307022978f1f395da587285c06c9cea47288 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 23 May 2024 19:44:29 +0200
Subject: drm/print: Add missing [drm] prefix to drm based WARN

All drm_device based logging macros, except those related to WARN,
include the [drm] prefix. Fix that.

  [ ] 0000:00:00.0: this is a warning
  [ ] 0000:00:00.0: drm_WARN_ON(true)
vs
  [ ] 0000:00:00.0: [drm] this is a warning
  [ ] 0000:00:00.0: [drm] drm_WARN_ON(true)

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240523174429.800-1-michal.wajdeczko@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/drm_print.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 089950ad8681..112f8830b372 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -632,12 +632,12 @@ void __drm_err(const char *format, ...);
 
 /* Helper for struct drm_device based WARNs */
 #define drm_WARN(drm, condition, format, arg...)			\
-	WARN(condition, "%s %s: " format,				\
+	WARN(condition, "%s %s: [drm] " format,				\
 			dev_driver_string((drm)->dev),			\
 			dev_name((drm)->dev), ## arg)
 
 #define drm_WARN_ONCE(drm, condition, format, arg...)			\
-	WARN_ONCE(condition, "%s %s: " format,				\
+	WARN_ONCE(condition, "%s %s: [drm] " format,			\
 			dev_driver_string((drm)->dev),			\
 			dev_name((drm)->dev), ## arg)
 
-- 
cgit v1.2.3


From 0d5edcc60abe9a02501f01e032bfa2432c1364de Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Thu, 16 May 2024 18:00:15 +0200
Subject: drm/print: Kill ___drm_dbg()

There is no point in maintaining a separate print function, while
there is __drm_dev_dbg() function that can work with a NULL device.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240516160015.2260-1-michal.wajdeczko@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/drm_print.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 112f8830b372..679a2086fbea 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -527,17 +527,15 @@ void __drm_dev_dbg(struct _ddebug *desc, const struct device *dev,
  * Prefer drm_device based logging over device or prink based logging.
  */
 
-__printf(3, 4)
-void ___drm_dbg(struct _ddebug *desc, enum drm_debug_category category, const char *format, ...);
 __printf(1, 2)
 void __drm_err(const char *format, ...);
 
 #if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
-#define __drm_dbg(cat, fmt, ...)		___drm_dbg(NULL, cat, fmt, ##__VA_ARGS__)
+#define __drm_dbg(cat, fmt, ...)	__drm_dev_dbg(NULL, NULL, cat, fmt, ##__VA_ARGS__)
 #else
 #define __drm_dbg(cat, fmt, ...)					\
-	_dynamic_func_call_cls(cat, fmt, ___drm_dbg,			\
-			       cat, fmt, ##__VA_ARGS__)
+	_dynamic_func_call_cls(cat, fmt, __drm_dev_dbg,			\
+			       NULL, cat, fmt, ##__VA_ARGS__)
 #endif
 
 /* Macros to make printk easier */
-- 
cgit v1.2.3


From c2ef66e9ad882ab4b055a86657c20c61d203f003 Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 17 May 2024 18:34:05 +0200
Subject: drm/print: Improve drm_dbg_printer

With recent introduction of a generic drm dev printk function, we
can now store and use location where drm_dbg_printer was invoked
and output it's symbolic name like we do for all drm debug prints.

Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240517163406.2348-3-michal.wajdeczko@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/drm/drm_print.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 679a2086fbea..5d9dff5149c9 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -175,6 +175,7 @@ struct drm_printer {
 	void (*printfn)(struct drm_printer *p, struct va_format *vaf);
 	void (*puts)(struct drm_printer *p, const char *str);
 	void *arg;
+	const void *origin;
 	const char *prefix;
 	enum drm_debug_category category;
 };
@@ -332,6 +333,7 @@ static inline struct drm_printer drm_dbg_printer(struct drm_device *drm,
 	struct drm_printer p = {
 		.printfn = __drm_printfn_dbg,
 		.arg = drm,
+		.origin = (const void *)_THIS_IP_, /* it's fine as we will be inlined */
 		.prefix = prefix,
 		.category = category,
 	};
-- 
cgit v1.2.3


From 1d5f0222944fe723b9bfaaeb27e368363644ccab Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Wed, 5 Jun 2024 16:26:45 -0400
Subject: ftrace: Declare function_trace_op in header to quiet sparse warning

Sparse complains that function_trace_op is not static but is not declared
in a header file. It is used only in assembly code. But add it to a header
so that sparse no longer complains:

 kernel/trace/ftrace.c:99:19: warning: symbol 'function_trace_op' was not declared. Should it be static?

Link: https://lore.kernel.org/linux-trace-kernel/20240605202708.289105647@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9f61556a9491..4135dc171447 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1131,6 +1131,9 @@ extern void ftrace_graph_init_task(struct task_struct *t);
 extern void ftrace_graph_exit_task(struct task_struct *t);
 extern void ftrace_graph_init_idle_task(struct task_struct *t, int cpu);
 
+/* Used by assembly, but to quiet sparse warnings */
+extern struct ftrace_ops *function_trace_op;
+
 static inline void pause_graph_tracing(void)
 {
 	atomic_inc(&current->tracing_graph_pause);
-- 
cgit v1.2.3


From c76494768761aef7630e7e0db820ba7b375964da Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 30 May 2024 11:07:19 -0700
Subject: linux/interrupt.h: allow "guard" notation to disable and reenable IRQ

Drivers often need to first disable an interrupt, carry out some
action, and then reenable the interrupt. Introduce support for the
"guard" notation for this so that the following is possible:

	...

	scoped_cond_guard(mutex_intr, return -EINTR, &data->sysfs_mutex) {
		guard(disable_irq)(&client->irq);

		error = elan_acquire_baseline(data);
		if (error)
			return error;
	}

	...

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/ZljAV6HjkPSEhWSw@google.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/interrupt.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5c9bdd3ffccc..3a36e64119c8 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -5,6 +5,7 @@
 
 #include <linux/kernel.h>
 #include <linux/bitops.h>
+#include <linux/cleanup.h>
 #include <linux/cpumask.h>
 #include <linux/irqreturn.h>
 #include <linux/irqnr.h>
@@ -235,6 +236,9 @@ extern void enable_percpu_irq(unsigned int irq, unsigned int type);
 extern bool irq_percpu_is_enabled(unsigned int irq);
 extern void irq_wake_thread(unsigned int irq, void *dev_id);
 
+DEFINE_LOCK_GUARD_1(disable_irq, int,
+		    disable_irq(*_T->lock), enable_irq(*_T->lock))
+
 extern void disable_nmi_nosync(unsigned int irq);
 extern void disable_percpu_nmi(unsigned int irq);
 extern void enable_nmi(unsigned int irq);
-- 
cgit v1.2.3


From b472b996a43404a912c5cb4f27050022fdbce10c Mon Sep 17 00:00:00 2001
From: Udit Kumar <u-kumar1@ti.com>
Date: Fri, 31 May 2024 22:27:25 +0530
Subject: dt-bindings: net: dp8386x: Add MIT license along with GPL-2.0

Modify license to include dual licensing as GPL-2.0-only OR MIT
license for TI specific phy header files. This allows for Linux
kernel files to be used in other Operating System ecosystems
such as Zephyr or FreeBSD.

While at this, update the GPL-2.0 to be GPL-2.0-only to be in sync
with latest SPDX conventions (GPL-2.0 is deprecated).

While at this, update the TI copyright year to sync with current year
to indicate license change.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Trent Piepho <tpiepho@impinj.com>
Cc: Wadim Egorov <w.egorov@phytec.de>
Cc: Kip Broadhurst <kbroadhurst@ti.com>
Signed-off-by: Udit Kumar <u-kumar1@ti.com>
Acked-by: Wadim Egorov <w.egorov@phytec.de>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/dt-bindings/net/ti-dp83867.h | 4 ++--
 include/dt-bindings/net/ti-dp83869.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/net/ti-dp83867.h b/include/dt-bindings/net/ti-dp83867.h
index 6fc4b445d3a1..b8a4f3ff4a3b 100644
--- a/include/dt-bindings/net/ti-dp83867.h
+++ b/include/dt-bindings/net/ti-dp83867.h
@@ -1,10 +1,10 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
 /*
  * Device Tree constants for the Texas Instruments DP83867 PHY
  *
  * Author: Dan Murphy <dmurphy@ti.com>
  *
- * Copyright:   (C) 2015 Texas Instruments, Inc.
+ * Copyright (C) 2015-2024 Texas Instruments Incorporated - https://www.ti.com/
  */
 
 #ifndef _DT_BINDINGS_TI_DP83867_H
diff --git a/include/dt-bindings/net/ti-dp83869.h b/include/dt-bindings/net/ti-dp83869.h
index 218b1a64e975..917114aad7d0 100644
--- a/include/dt-bindings/net/ti-dp83869.h
+++ b/include/dt-bindings/net/ti-dp83869.h
@@ -1,10 +1,10 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
 /*
  * Device Tree constants for the Texas Instruments DP83869 PHY
  *
  * Author: Dan Murphy <dmurphy@ti.com>
  *
- * Copyright:   (C) 2019 Texas Instruments, Inc.
+ * Copyright (C) 2015-2024 Texas Instruments Incorporated - https://www.ti.com/
  */
 
 #ifndef _DT_BINDINGS_TI_DP83869_H
-- 
cgit v1.2.3


From b4100947a8014e1876872546398275968f77e1ad Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Tue, 28 May 2024 16:07:10 -0500
Subject: crypto: ccp - align psp_platform_access_msg

Align the whitespace so that future messages will also be better
aligned.

Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/psp-platform-access.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h
index c1dc87fc536b..23893b33e48c 100644
--- a/include/linux/psp-platform-access.h
+++ b/include/linux/psp-platform-access.h
@@ -6,8 +6,8 @@
 #include <linux/psp.h>
 
 enum psp_platform_access_msg {
-	PSP_CMD_NONE = 0x0,
-	PSP_I2C_REQ_BUS_CMD = 0x64,
+	PSP_CMD_NONE			= 0x0,
+	PSP_I2C_REQ_BUS_CMD		= 0x64,
 	PSP_DYNAMIC_BOOST_GET_NONCE,
 	PSP_DYNAMIC_BOOST_SET_UID,
 	PSP_DYNAMIC_BOOST_GET_PARAMETER,
-- 
cgit v1.2.3


From 82f9327f774c6e040ba63a887a85fa2e290a233a Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Tue, 28 May 2024 16:07:11 -0500
Subject: crypto: ccp - Add support for getting security attributes on some
 older systems

Older systems will not populate the security attributes in the
capabilities register. The PSP on these systems, however, does have a
command to get the security attributes. Use this command during ccp
startup to populate the attributes if they're missing.

Closes: https://github.com/fwupd/fwupd/issues/5284
Closes: https://github.com/fwupd/fwupd/issues/5675
Closes: https://github.com/fwupd/fwupd/issues/6253
Closes: https://github.com/fwupd/fwupd/issues/7280
Closes: https://github.com/fwupd/fwupd/issues/6323
Closes: https://github.com/fwupd/fwupd/discussions/5433
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/psp-platform-access.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h
index 23893b33e48c..1504fb012c05 100644
--- a/include/linux/psp-platform-access.h
+++ b/include/linux/psp-platform-access.h
@@ -7,6 +7,7 @@
 
 enum psp_platform_access_msg {
 	PSP_CMD_NONE			= 0x0,
+	PSP_CMD_HSTI_QUERY		= 0x14,
 	PSP_I2C_REQ_BUS_CMD		= 0x64,
 	PSP_DYNAMIC_BOOST_GET_NONCE,
 	PSP_DYNAMIC_BOOST_SET_UID,
-- 
cgit v1.2.3


From 46b3ff73afc815f1feb844e6b57e43cc13051544 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 31 May 2024 18:20:03 +0800
Subject: crypto: sm2 - Remove sm2 algorithm

The SM2 algorithm has a single user in the kernel.  However, it's
never been integrated properly with that user: asymmetric_keys.

The crux of the issue is that the way it computes its digest with
sm3 does not fit into the architecture of asymmetric_keys.  As no
solution has been proposed, remove this algorithm.

It can be resubmitted when it is integrated properly into the
asymmetric_keys subsystem.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/sm2.h | 28 ----------------------------
 1 file changed, 28 deletions(-)
 delete mode 100644 include/crypto/sm2.h

(limited to 'include')

diff --git a/include/crypto/sm2.h b/include/crypto/sm2.h
deleted file mode 100644
index 04a92c1013c8..000000000000
--- a/include/crypto/sm2.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * sm2.h - SM2 asymmetric public-key algorithm
- * as specified by OSCCA GM/T 0003.1-2012 -- 0003.5-2012 SM2 and
- * described at https://tools.ietf.org/html/draft-shen-sm2-ecdsa-02
- *
- * Copyright (c) 2020, Alibaba Group.
- * Written by Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
- */
-
-#ifndef _CRYPTO_SM2_H
-#define _CRYPTO_SM2_H
-
-struct shash_desc;
-
-#if IS_REACHABLE(CONFIG_CRYPTO_SM2)
-int sm2_compute_z_digest(struct shash_desc *desc,
-			 const void *key, unsigned int keylen, void *dgst);
-#else
-static inline int sm2_compute_z_digest(struct shash_desc *desc,
-				       const void *key, unsigned int keylen,
-				       void *dgst)
-{
-	return -ENOTSUPP;
-}
-#endif
-
-#endif /* _CRYPTO_SM2_H */
-- 
cgit v1.2.3


From df3fb27a74a4eeb1436129024a7e957c2e83a95e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= <noralf@tronnes.org>
Date: Tue, 4 Jun 2024 15:20:30 +0200
Subject: drm/mipi-dbi: Make bits per word configurable for pixel transfers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MIPI DCS write/set commands have 8 bit parameters except for the
write_memory commands where it depends on the pixel format.
drm_mipi_dbi does currently only support RGB565 which is 16-bit and it
has to make sure that the pixels enters the SPI bus in big endian format
since the MIPI DBI spec doesn't have support for little endian.

drm_mipi_dbi is optimized for DBI interface option 3 which means that the
16-bit bytes are swapped by the upper layer if the SPI bus does not
support 16 bits per word, signified by the swap_bytes member.

In order to support both 16-bit and 24-bit pixel transfers we need a way
to tell the DBI command layer the format of the buffer. Add a
write_memory_bpw member that the upper layer can use to tell how many
bits per word to use for the SPI transfer.

v4:
- Expand the commit message (Dmitry)

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240604-panel-mipi-dbi-rgb666-v4-3-d7c2bcb9b78d@tronnes.org
Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
---
 include/drm/drm_mipi_dbi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dbi.h b/include/drm/drm_mipi_dbi.h
index e8e0f8d39f3a..b36596efdcc3 100644
--- a/include/drm/drm_mipi_dbi.h
+++ b/include/drm/drm_mipi_dbi.h
@@ -56,6 +56,11 @@ struct mipi_dbi {
 	 */
 	struct spi_device *spi;
 
+	/**
+	 * @write_memory_bpw: Bits per word used on a MIPI_DCS_WRITE_MEMORY_START transfer
+	 */
+	unsigned int write_memory_bpw;
+
 	/**
 	 * @dc: Optional D/C gpio.
 	 */
-- 
cgit v1.2.3


From 4aebb79021f3e6c2b6fbb92a7d9c5d1e6ad0324a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= <noralf@tronnes.org>
Date: Tue, 4 Jun 2024 15:20:31 +0200
Subject: drm/mipi-dbi: Add support for DRM_FORMAT_RGB888
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DRM_FORMAT_RGB888 is 24 bits per pixel and it would be natural to send it
on the SPI bus using a 24 bits per word transfer. The problem with this
is that not all SPI controllers support 24 bpw.

Since DRM_FORMAT_RGB888 is stored in memory as little endian and the SPI
bus is big endian we use 8 bpw to always get the same pixel format on the
bus: b8g8r8.

The MIPI DCS specification lists the standard commands that can be sent
over the MIPI DBI interface. The set_address_mode (36h) command has one
bit in the parameter that controls RGB/BGR order. This means that the
controller can be configured to receive the pixel as BGR.

RGB888 is rarely supported on these controllers but RGB666 is very common.
All datasheets I have seen do at least support the pixel format option
where each color is sent as one byte and the 6 MSB's are used.

All this put together means that we can send each pixel as b8g8r8 and an
RGB666 capable controller sees this as b6x2g6x2r6x2.

v4:
- s/emulation_format/pixel_format/ (Dmitry)

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240604-panel-mipi-dbi-rgb666-v4-4-d7c2bcb9b78d@tronnes.org
Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
---
 include/drm/drm_mipi_dbi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dbi.h b/include/drm/drm_mipi_dbi.h
index b36596efdcc3..f45f9612c0bc 100644
--- a/include/drm/drm_mipi_dbi.h
+++ b/include/drm/drm_mipi_dbi.h
@@ -101,6 +101,11 @@ struct mipi_dbi_dev {
 	 */
 	struct drm_display_mode mode;
 
+	/**
+	 * @pixel_format: Native pixel format (DRM_FORMAT\_\*)
+	 */
+	u32 pixel_format;
+
 	/**
 	 * @tx_buf: Buffer used for transfer (copy clip rect area)
 	 */
-- 
cgit v1.2.3


From f05eda16037f9363297561bd28f318a6d7833d35 Mon Sep 17 00:00:00 2001
From: Yan Zhao <yan.y.zhao@intel.com>
Date: Fri, 8 Mar 2024 17:09:27 -0800
Subject: srcu: Add an API for a memory barrier after SRCU read lock

To avoid redundant memory barriers, add smp_mb__after_srcu_read_lock() to
pair with smp_mb__after_srcu_read_unlock() for use in paths that need to
emit a memory barrier, but already do srcu_read_lock(), which includes a
full memory barrier.  Provide an API, e.g. as opposed to having callers
document the behavior via a comment, as the full memory barrier provided
by srcu_read_lock() is an implementation detail that shouldn't bleed into
random subsystems.

KVM will use smp_mb__after_srcu_read_lock() in it's VM-Exit path to ensure
a memory barrier is emitted, which is necessary to ensure correctness of
mixed memory types on CPUs that support self-snoop.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
[sean: massage changelog]
Tested-by: Xiangfei Ma <xiangfeix.ma@intel.com>
Tested-by: Yongwei Ma <yongwei.ma@intel.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org
Link: https://lore.kernel.org/r/20240309010929.1403984-4-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/srcu.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 236610e4a8fa..1cb4527076de 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -343,6 +343,20 @@ static inline void smp_mb__after_srcu_read_unlock(void)
 	/* __srcu_read_unlock has smp_mb() internally so nothing to do here. */
 }
 
+/**
+ * smp_mb__after_srcu_read_lock - ensure full ordering after srcu_read_lock
+ *
+ * Converts the preceding srcu_read_lock into a two-way memory barrier.
+ *
+ * Call this after srcu_read_lock, to guarantee that all memory operations
+ * that occur after smp_mb__after_srcu_read_lock will appear to happen after
+ * the preceding srcu_read_lock.
+ */
+static inline void smp_mb__after_srcu_read_lock(void)
+{
+	/* __srcu_read_lock has smp_mb() internally so nothing to do here. */
+}
+
 DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct,
 		    _T->idx = srcu_read_lock(_T->lock),
 		    srcu_read_unlock(_T->lock, _T->idx),
-- 
cgit v1.2.3


From 6a79a4e187bdd17959ff3e811d5de65c066f89e6 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Thu, 6 Jun 2024 10:33:49 +0300
Subject: libfs: Introduce case-insensitive string comparison helper

generic_ci_match can be used by case-insensitive filesystems to compare
strings under lookup with dirents in a case-insensitive way.  This
function is currently reimplemented by each filesystem supporting
casefolding, so this reduces code duplication in filesystem-specific
code.

[eugen.hristev@collabora.com: rework to first test the exact match, cleanup
and add error message]

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Eugen Hristev <eugen.hristev@collabora.com>
Link: https://lore.kernel.org/r/20240606073353.47130-4-eugen.hristev@collabora.com
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0283cf366c2a..2b1eeca426a0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3351,6 +3351,10 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
 extern int generic_check_addressable(unsigned, u64);
 
 extern void generic_set_sb_d_ops(struct super_block *sb);
+extern int generic_ci_match(const struct inode *parent,
+			    const struct qstr *name,
+			    const struct qstr *folded_name,
+			    const u8 *de_name, u32 de_name_len);
 
 static inline bool sb_has_encoding(const struct super_block *sb)
 {
-- 
cgit v1.2.3


From 231035f18d6b80e5c28732a20872398116a54ecd Mon Sep 17 00:00:00 2001
From: Wenchao Hao <haowenchao22@gmail.com>
Date: Thu, 6 Jun 2024 16:52:15 +0800
Subject: workqueue: Increase worker desc's length to 32

Commit 31c89007285d ("workqueue.c: Increase workqueue name length")
increased WQ_NAME_LEN from 24 to 32, but forget to increase
WORKER_DESC_LEN, which would cause truncation when setting kworker's
desc from workqueue_struct's name, process_one_work() for example.

Fixes: 31c89007285d ("workqueue.c: Increase workqueue name length")

Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
CC: Audra Mitchell <audra@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index fb3993894536..d9968bfc8eac 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -95,7 +95,7 @@ enum wq_misc_consts {
 	WORK_BUSY_RUNNING	= 1 << 1,
 
 	/* maximum string length for set_worker_desc() */
-	WORKER_DESC_LEN		= 24,
+	WORKER_DESC_LEN		= 32,
 };
 
 /* Convenience constants - of type 'unsigned long', not 'enum'! */
-- 
cgit v1.2.3


From 11c63e57404e538c5df91f732e5d505860edb660 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Thu, 16 May 2024 12:25:31 +0200
Subject: firmware: add nowarn variant of request_firmware_nowait()

Device drivers with optional firmware may still want to use the
asynchronous firmware loading interface. To avoid printing a
warning into the kernel log when the optional firmware is
absent, add a nowarn variant of this interface.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Link: https://lore.kernel.org/r/20240516102532.213874-1-l.stach@pengutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/firmware.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index f026f8926d79..aae1b85ffc10 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -98,6 +98,10 @@ static inline bool firmware_request_builtin(struct firmware *fw,
 #if IS_REACHABLE(CONFIG_FW_LOADER)
 int request_firmware(const struct firmware **fw, const char *name,
 		     struct device *device);
+int firmware_request_nowait_nowarn(
+	struct module *module, const char *name,
+	struct device *device, gfp_t gfp, void *context,
+	void (*cont)(const struct firmware *fw, void *context));
 int firmware_request_nowarn(const struct firmware **fw, const char *name,
 			    struct device *device);
 int firmware_request_platform(const struct firmware **fw, const char *name,
@@ -123,6 +127,14 @@ static inline int request_firmware(const struct firmware **fw,
 	return -EINVAL;
 }
 
+static inline int firmware_request_nowait_nowarn(
+	struct module *module, const char *name,
+	struct device *device, gfp_t gfp, void *context,
+	void (*cont)(const struct firmware *fw, void *context))
+{
+	return -EINVAL;
+}
+
 static inline int firmware_request_nowarn(const struct firmware **fw,
 					  const char *name,
 					  struct device *device)
-- 
cgit v1.2.3


From a31a0a3e90b442c1a414be24b062e27624a40a8e Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Tue, 28 May 2024 11:47:16 -0700
Subject: thermal: intel: intel_soc_dts_thermal: Switch to new Intel CPU model
 defines

New CPU #defines encode vendor and family as well as model.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/platform_data/x86/soc.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/x86/soc.h b/include/linux/platform_data/x86/soc.h
index a5705189e2ac..f981907a5cb0 100644
--- a/include/linux/platform_data/x86/soc.h
+++ b/include/linux/platform_data/x86/soc.h
@@ -20,7 +20,7 @@
 static inline bool soc_intel_is_##soc(void)			\
 {								\
 	static const struct x86_cpu_id soc##_cpu_ids[] = {	\
-		X86_MATCH_INTEL_FAM6_MODEL(type, NULL),		\
+		X86_MATCH_VFM(type, NULL),			\
 		{}						\
 	};							\
 	const struct x86_cpu_id *id;				\
@@ -31,11 +31,11 @@ static inline bool soc_intel_is_##soc(void)			\
 	return false;						\
 }
 
-SOC_INTEL_IS_CPU(byt, ATOM_SILVERMONT);
-SOC_INTEL_IS_CPU(cht, ATOM_AIRMONT);
-SOC_INTEL_IS_CPU(apl, ATOM_GOLDMONT);
-SOC_INTEL_IS_CPU(glk, ATOM_GOLDMONT_PLUS);
-SOC_INTEL_IS_CPU(cml, KABYLAKE_L);
+SOC_INTEL_IS_CPU(byt, INTEL_ATOM_SILVERMONT);
+SOC_INTEL_IS_CPU(cht, INTEL_ATOM_AIRMONT);
+SOC_INTEL_IS_CPU(apl, INTEL_ATOM_GOLDMONT);
+SOC_INTEL_IS_CPU(glk, INTEL_ATOM_GOLDMONT_PLUS);
+SOC_INTEL_IS_CPU(cml, INTEL_KABYLAKE_L);
 
 #undef SOC_INTEL_IS_CPU
 
-- 
cgit v1.2.3


From 2a1251e3dbb2995100b6f351c2452228895386a5 Mon Sep 17 00:00:00 2001
From: Konstantin Taranov <kotaranov@microsoft.com>
Date: Fri, 7 Jun 2024 03:08:17 -0700
Subject: RDMA/mana_ib: Process QP error events in mana_ib

Process QP fatal events from the error event queue.
For that, find the QP, using QPN from the event, and then call its
event_handler. To find the QPs, store created RC QPs in an xarray.

Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
Link: https://lore.kernel.org/r/1717754897-19858-1-git-send-email-kotaranov@linux.microsoft.com
Reviewed-by: Wei Hu <weh@microsoft.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/net/mana/gdma.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 27684135bb4d..44c797d5f899 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -60,6 +60,7 @@ enum gdma_eqe_type {
 	GDMA_EQE_HWC_INIT_DONE		= 131,
 	GDMA_EQE_HWC_SOC_RECONFIG	= 132,
 	GDMA_EQE_HWC_SOC_RECONFIG_DATA	= 133,
+	GDMA_EQE_RNIC_QP_FATAL		= 176,
 };
 
 enum {
-- 
cgit v1.2.3


From 195fb517ee25bfefde9c74ecd86348eccbd6d2e4 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Sun, 9 Jun 2024 17:39:24 -0700
Subject: cpu: Move CPU hotplug function declarations into their own header

Avoid upcoming #include hell when <linux/cachinfo.h> wants to use
lockdep_assert_cpus_held() and creates a #include loop that would
break the build for arch/riscv.

  [ bp: s/cpu/CPU/g ]

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20240610003927.341707-2-tony.luck@intel.com
---
 include/linux/cpu.h       | 33 +--------------------------------
 include/linux/cpuhplock.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 32 deletions(-)
 create mode 100644 include/linux/cpuhplock.h

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 861c3bfc5f17..a8926d0a28cd 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -18,6 +18,7 @@
 #include <linux/compiler.h>
 #include <linux/cpumask.h>
 #include <linux/cpuhotplug.h>
+#include <linux/cpuhplock.h>
 #include <linux/cpu_smt.h>
 
 struct device;
@@ -132,38 +133,6 @@ static inline int add_cpu(unsigned int cpu) { return 0;}
 #endif /* CONFIG_SMP */
 extern const struct bus_type cpu_subsys;
 
-extern int lockdep_is_cpus_held(void);
-
-#ifdef CONFIG_HOTPLUG_CPU
-extern void cpus_write_lock(void);
-extern void cpus_write_unlock(void);
-extern void cpus_read_lock(void);
-extern void cpus_read_unlock(void);
-extern int  cpus_read_trylock(void);
-extern void lockdep_assert_cpus_held(void);
-extern void cpu_hotplug_disable(void);
-extern void cpu_hotplug_enable(void);
-void clear_tasks_mm_cpumask(int cpu);
-int remove_cpu(unsigned int cpu);
-int cpu_device_down(struct device *dev);
-extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu);
-
-#else /* CONFIG_HOTPLUG_CPU */
-
-static inline void cpus_write_lock(void) { }
-static inline void cpus_write_unlock(void) { }
-static inline void cpus_read_lock(void) { }
-static inline void cpus_read_unlock(void) { }
-static inline int  cpus_read_trylock(void) { return true; }
-static inline void lockdep_assert_cpus_held(void) { }
-static inline void cpu_hotplug_disable(void) { }
-static inline void cpu_hotplug_enable(void) { }
-static inline int remove_cpu(unsigned int cpu) { return -EPERM; }
-static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { }
-#endif	/* !CONFIG_HOTPLUG_CPU */
-
-DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock())
-
 #ifdef CONFIG_PM_SLEEP_SMP
 extern int freeze_secondary_cpus(int primary);
 extern void thaw_secondary_cpus(void);
diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h
new file mode 100644
index 000000000000..386abc482264
--- /dev/null
+++ b/include/linux/cpuhplock.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/linux/cpuhplock.h - CPU hotplug locking
+ *
+ * Locking functions for CPU hotplug.
+ */
+#ifndef _LINUX_CPUHPLOCK_H_
+#define _LINUX_CPUHPLOCK_H_
+
+#include <linux/cleanup.h>
+#include <linux/errno.h>
+
+struct device;
+
+extern int lockdep_is_cpus_held(void);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void cpus_write_lock(void);
+extern void cpus_write_unlock(void);
+extern void cpus_read_lock(void);
+extern void cpus_read_unlock(void);
+extern int  cpus_read_trylock(void);
+extern void lockdep_assert_cpus_held(void);
+extern void cpu_hotplug_disable(void);
+extern void cpu_hotplug_enable(void);
+void clear_tasks_mm_cpumask(int cpu);
+int remove_cpu(unsigned int cpu);
+int cpu_device_down(struct device *dev);
+extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu);
+
+#else /* CONFIG_HOTPLUG_CPU */
+
+static inline void cpus_write_lock(void) { }
+static inline void cpus_write_unlock(void) { }
+static inline void cpus_read_lock(void) { }
+static inline void cpus_read_unlock(void) { }
+static inline int  cpus_read_trylock(void) { return true; }
+static inline void lockdep_assert_cpus_held(void) { }
+static inline void cpu_hotplug_disable(void) { }
+static inline void cpu_hotplug_enable(void) { }
+static inline int remove_cpu(unsigned int cpu) { return -EPERM; }
+static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { }
+#endif	/* !CONFIG_HOTPLUG_CPU */
+
+DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock())
+
+#endif /* _LINUX_CPUHPLOCK_H_ */
-- 
cgit v1.2.3


From ddefcfdeb5a2238cbcb07b80dda9ac3136735b1e Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Sun, 9 Jun 2024 17:39:25 -0700
Subject: cpu: Drop "extern" from function declarations in cpuhplock.h

This file was created with a direct cut and paste from cpu.h so
kept the legacy declaration style.

But the Linux coding standard for function declarations in header
files is to avoid use of "extern".

Drop "extern" from all function declarations.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20240610003927.341707-3-tony.luck@intel.com
---
 include/linux/cpuhplock.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h
index 386abc482264..431560bbd045 100644
--- a/include/linux/cpuhplock.h
+++ b/include/linux/cpuhplock.h
@@ -15,18 +15,18 @@ struct device;
 extern int lockdep_is_cpus_held(void);
 
 #ifdef CONFIG_HOTPLUG_CPU
-extern void cpus_write_lock(void);
-extern void cpus_write_unlock(void);
-extern void cpus_read_lock(void);
-extern void cpus_read_unlock(void);
-extern int  cpus_read_trylock(void);
-extern void lockdep_assert_cpus_held(void);
-extern void cpu_hotplug_disable(void);
-extern void cpu_hotplug_enable(void);
+void cpus_write_lock(void);
+void cpus_write_unlock(void);
+void cpus_read_lock(void);
+void cpus_read_unlock(void);
+int  cpus_read_trylock(void);
+void lockdep_assert_cpus_held(void);
+void cpu_hotplug_disable(void);
+void cpu_hotplug_enable(void);
 void clear_tasks_mm_cpumask(int cpu);
 int remove_cpu(unsigned int cpu);
 int cpu_device_down(struct device *dev);
-extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu);
+void smp_shutdown_nonboot_cpus(unsigned int primary_cpu);
 
 #else /* CONFIG_HOTPLUG_CPU */
 
-- 
cgit v1.2.3


From 685cb1674060c2cb1b9da051a12933c082b8e874 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Sun, 9 Jun 2024 17:39:26 -0700
Subject: cacheinfo: Add function to get cacheinfo for a given CPU and cache
 level

Resctrl open codes a search for information about a given cache level in
a couple of places (and more are on the way).

Provide a new inline function get_cpu_cacheinfo_level() in
<linux/cacheinfo.h> to do the search and return a pointer to the
cacheinfo structure.

Add lockdep_assert_cpus_held() to enforce the comment that cpuhp lock
must be held.

Simplify the existing get_cpu_cacheinfo_id() by using this new function
to do the search.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/r/20240610003927.341707-4-tony.luck@intel.com
---
 include/linux/cacheinfo.h | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 2cb15fe4fe12..3dde175f4108 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -3,6 +3,7 @@
 #define _LINUX_CACHEINFO_H
 
 #include <linux/bitops.h>
+#include <linux/cpuhplock.h>
 #include <linux/cpumask.h>
 #include <linux/smp.h>
 
@@ -113,23 +114,37 @@ int acpi_get_cache_info(unsigned int cpu,
 const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf);
 
 /*
- * Get the id of the cache associated with @cpu at level @level.
+ * Get the cacheinfo structure for the cache associated with @cpu at
+ * level @level.
  * cpuhp lock must be held.
  */
-static inline int get_cpu_cacheinfo_id(int cpu, int level)
+static inline struct cacheinfo *get_cpu_cacheinfo_level(int cpu, int level)
 {
 	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
 	int i;
 
+	lockdep_assert_cpus_held();
+
 	for (i = 0; i < ci->num_leaves; i++) {
 		if (ci->info_list[i].level == level) {
 			if (ci->info_list[i].attributes & CACHE_ID)
-				return ci->info_list[i].id;
-			return -1;
+				return &ci->info_list[i];
+			return NULL;
 		}
 	}
 
-	return -1;
+	return NULL;
+}
+
+/*
+ * Get the id of the cache associated with @cpu at level @level.
+ * cpuhp lock must be held.
+ */
+static inline int get_cpu_cacheinfo_id(int cpu, int level)
+{
+	struct cacheinfo *ci = get_cpu_cacheinfo_level(cpu, level);
+
+	return ci ? ci->id : -1;
 }
 
 #ifdef CONFIG_ARM64
-- 
cgit v1.2.3


From 62096c48394b49e326056a62780fb67b60edde91 Mon Sep 17 00:00:00 2001
From: Ming Qian <ming.qian@nxp.com>
Date: Mon, 6 May 2024 17:49:16 +0900
Subject: media: v4l2-ctrls: Add average QP control

Add a control V4L2_CID_MPEG_VIDEO_AVERAGE_QP to report the average QP
value of the current encoded frame. The value applies to the last
dequeued capture buffer.

Signed-off-by: Ming Qian <ming.qian@nxp.com>
Reviewed-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Sebastian Fricke <sebastian.fricke@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/uapi/linux/v4l2-controls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 99c3f5e99da7..974fd254e573 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -898,6 +898,8 @@ enum v4l2_mpeg_video_av1_level {
 	V4L2_MPEG_VIDEO_AV1_LEVEL_7_3 = 23
 };
 
+#define V4L2_CID_MPEG_VIDEO_AVERAGE_QP  (V4L2_CID_CODEC_BASE + 657)
+
 /*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */
 #define V4L2_CID_CODEC_CX2341X_BASE				(V4L2_CTRL_CLASS_CODEC | 0x1000)
 #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE		(V4L2_CID_CODEC_CX2341X_BASE+0)
-- 
cgit v1.2.3


From 97d833ceb27dc19f8777d63f90be4a27b5daeedf Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 6 Jun 2024 16:49:42 +0200
Subject: mlxsw: spectrum_acl_erp: Fix object nesting warning

ACLs in Spectrum-2 and newer ASICs can reside in the algorithmic TCAM
(A-TCAM) or in the ordinary circuit TCAM (C-TCAM). The former can
contain more ACLs (i.e., tc filters), but the number of masks in each
region (i.e., tc chain) is limited.

In order to mitigate the effects of the above limitation, the device
allows filters to share a single mask if their masks only differ in up
to 8 consecutive bits. For example, dst_ip/25 can be represented using
dst_ip/24 with a delta of 1 bit. The C-TCAM does not have a limit on the
number of masks being used (and therefore does not support mask
aggregation), but can contain a limited number of filters.

The driver uses the "objagg" library to perform the mask aggregation by
passing it objects that consist of the filter's mask and whether the
filter is to be inserted into the A-TCAM or the C-TCAM since filters in
different TCAMs cannot share a mask.

The set of created objects is dependent on the insertion order of the
filters and is not necessarily optimal. Therefore, the driver will
periodically ask the library to compute a more optimal set ("hints") by
looking at all the existing objects.

When the library asks the driver whether two objects can be aggregated
the driver only compares the provided masks and ignores the A-TCAM /
C-TCAM indication. This is the right thing to do since the goal is to
move as many filters as possible to the A-TCAM. The driver also forbids
two identical masks from being aggregated since this can only happen if
one was intentionally put in the C-TCAM to avoid a conflict in the
A-TCAM.

The above can result in the following set of hints:

H1: {mask X, A-TCAM} -> H2: {mask Y, A-TCAM} // X is Y + delta
H3: {mask Y, C-TCAM} -> H4: {mask Z, A-TCAM} // Y is Z + delta

After getting the hints from the library the driver will start migrating
filters from one region to another while consulting the computed hints
and instructing the device to perform a lookup in both regions during
the transition.

Assuming a filter with mask X is being migrated into the A-TCAM in the
new region, the hints lookup will return H1. Since H2 is the parent of
H1, the library will try to find the object associated with it and
create it if necessary in which case another hints lookup (recursive)
will be performed. This hints lookup for {mask Y, A-TCAM} will either
return H2 or H3 since the driver passes the library an object comparison
function that ignores the A-TCAM / C-TCAM indication.

This can eventually lead to nested objects which are not supported by
the library [1].

Fix by removing the object comparison function from both the driver and
the library as the driver was the only user. That way the lookup will
only return exact matches.

I do not have a reliable reproducer that can reproduce the issue in a
timely manner, but before the fix the issue would reproduce in several
minutes and with the fix it does not reproduce in over an hour.

Note that the current usefulness of the hints is limited because they
include the C-TCAM indication and represent aggregation that cannot
actually happen. This will be addressed in net-next.

[1]
WARNING: CPU: 0 PID: 153 at lib/objagg.c:170 objagg_obj_parent_assign+0xb5/0xd0
Modules linked in:
CPU: 0 PID: 153 Comm: kworker/0:18 Not tainted 6.9.0-rc6-custom-g70fbc2c1c38b #42
Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018
Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work
RIP: 0010:objagg_obj_parent_assign+0xb5/0xd0
[...]
Call Trace:
 <TASK>
 __objagg_obj_get+0x2bb/0x580
 objagg_obj_get+0xe/0x80
 mlxsw_sp_acl_erp_mask_get+0xb5/0xf0
 mlxsw_sp_acl_atcam_entry_add+0xe8/0x3c0
 mlxsw_sp_acl_tcam_entry_create+0x5e/0xa0
 mlxsw_sp_acl_tcam_vchunk_migrate_one+0x16b/0x270
 mlxsw_sp_acl_tcam_vregion_rehash_work+0xbe/0x510
 process_one_work+0x151/0x370

Fixes: 9069a3817d82 ("lib: objagg: implement optimization hints assembly and use hints for object creation")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Amit Cohen <amcohen@nvidia.com>
Tested-by: Alexander Zubkov <green@qrator.net>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/objagg.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/objagg.h b/include/linux/objagg.h
index 78021777df46..6df5b887dc54 100644
--- a/include/linux/objagg.h
+++ b/include/linux/objagg.h
@@ -8,7 +8,6 @@ struct objagg_ops {
 	size_t obj_size;
 	bool (*delta_check)(void *priv, const void *parent_obj,
 			    const void *obj);
-	int (*hints_obj_cmp)(const void *obj1, const void *obj2);
 	void * (*delta_create)(void *priv, void *parent_obj, void *obj);
 	void (*delta_destroy)(void *priv, void *delta_priv);
 	void * (*root_create)(void *priv, void *obj, unsigned int root_id);
-- 
cgit v1.2.3


From 96f3b978736356ba0e5a7d923681765c7ea9b12b Mon Sep 17 00:00:00 2001
From: Dmitry Rokosov <ddrokosov@salutedevices.com>
Date: Wed, 15 May 2024 21:47:25 +0300
Subject: dt-bindings: clock: meson: a1: pll: introduce new syspll bindings

The 'syspll' PLL is a general-purpose PLL designed specifically for the
CPU clock. It is capable of producing output frequencies within the
range of 768MHz to 1536MHz.

The 'syspll_in' source clock is an optional parent connection from the
peripherals clock controller.

Signed-off-by: Dmitry Rokosov <ddrokosov@salutedevices.com>
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://lore.kernel.org/r/20240515185103.20256-3-ddrokosov@salutedevices.com
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 include/dt-bindings/clock/amlogic,a1-pll-clkc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/amlogic,a1-pll-clkc.h b/include/dt-bindings/clock/amlogic,a1-pll-clkc.h
index 2b660c0f2c9f..0dfc5e78a2d5 100644
--- a/include/dt-bindings/clock/amlogic,a1-pll-clkc.h
+++ b/include/dt-bindings/clock/amlogic,a1-pll-clkc.h
@@ -21,5 +21,6 @@
 #define CLKID_FCLK_DIV5		8
 #define CLKID_FCLK_DIV7		9
 #define CLKID_HIFI_PLL		10
+#define CLKID_SYS_PLL		11
 
 #endif /* __A1_PLL_CLKC_H */
-- 
cgit v1.2.3


From 41056416ed538d1a05dbba57060c02acdc5154e7 Mon Sep 17 00:00:00 2001
From: Dmitry Rokosov <ddrokosov@salutedevices.com>
Date: Wed, 15 May 2024 21:47:27 +0300
Subject: dt-bindings: clock: meson: a1: peripherals: support sys_pll input

The 'sys_pll' input is an optional clock that can be used to generate
'sys_pll_div16', which serves as one of the sources for the GEN clock.

Signed-off-by: Dmitry Rokosov <ddrokosov@salutedevices.com>
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://lore.kernel.org/r/20240515185103.20256-5-ddrokosov@salutedevices.com
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h b/include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h
index 06f198ee7623..2ce1a06dc735 100644
--- a/include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h
+++ b/include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h
@@ -164,5 +164,6 @@
 #define CLKID_DMC_SEL		151
 #define CLKID_DMC_DIV		152
 #define CLKID_DMC_SEL2		153
+#define CLKID_SYS_PLL_DIV16	154
 
 #endif /* __A1_PERIPHERALS_CLKC_H */
-- 
cgit v1.2.3


From b334b924c9b709bc969644fb5c406f5c9d01dceb Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Thu, 6 Jun 2024 17:11:37 +0200
Subject: net: tcp/dccp: prepare for tw_timer un-pinning

The TCP timewait timer is proving to be problematic for setups where
scheduler CPU isolation is achieved at runtime via cpusets (as opposed to
statically via isolcpus=domains).

What happens there is a CPU goes through tcp_time_wait(), arming the
time_wait timer, then gets isolated. TCP_TIMEWAIT_LEN later, the timer
fires, causing interference for the now-isolated CPU. This is conceptually
similar to the issue described in commit e02b93124855 ("workqueue: Unbind
kworkers before sending them to exit()")

Move inet_twsk_schedule() to within inet_twsk_hashdance(), with the ehash
lock held. Expand the lock's critical section from inet_twsk_kill() to
inet_twsk_deschedule_put(), serializing the scheduling vs descheduling of
the timer. IOW, this prevents the following race:

			     tcp_time_wait()
			       inet_twsk_hashdance()
  inet_twsk_deschedule_put()
    del_timer_sync()
			       inet_twsk_schedule()

Thanks to Paolo Abeni for suggesting to leverage the ehash lock.

This also restores a comment from commit ec94c2696f0b ("tcp/dccp: avoid
one atomic operation for timewait hashdance") as inet_twsk_hashdance() had
a "Step 1" and "Step 3" comment, but the "Step 2" had gone missing.

inet_twsk_deschedule_put() now acquires the ehash spinlock to synchronize
with inet_twsk_hashdance_schedule().

To ease possible regression search, actual un-pin is done in next patch.

Link: https://lore.kernel.org/all/ZPhpfMjSiHVjQkTk@localhost.localdomain/
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_timewait_sock.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 2a536eea9424..5b43d220243d 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -93,8 +93,10 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 					   struct inet_timewait_death_row *dr,
 					   const int state);
 
-void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
-			 struct inet_hashinfo *hashinfo);
+void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
+				  struct sock *sk,
+				  struct inet_hashinfo *hashinfo,
+				  int timeo);
 
 void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo,
 			  bool rearm);
-- 
cgit v1.2.3


From f81d0dd2fde35fd1acc30b3f4de6aaf57d514551 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 6 Jun 2024 17:11:39 +0200
Subject: tcp: move inet_twsk_schedule helper out of header

Its no longer used outside inet_timewait_sock.c, so move it there.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_timewait_sock.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 5b43d220243d..f88b68269012 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -101,11 +101,6 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
 void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo,
 			  bool rearm);
 
-static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo)
-{
-	__inet_twsk_schedule(tw, timeo, false);
-}
-
 static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo)
 {
 	__inet_twsk_schedule(tw, timeo, true);
-- 
cgit v1.2.3


From 0b7e448119428e1dcb854abb5855f66966fb82dc Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Tue, 28 May 2024 14:29:33 -0500
Subject: ACPI: utils: introduce acpi_get_local_u64_address()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ACPI _ADR is a 64-bit value. We changed the definitions in commit
ca6f998cf9a2 ("ACPI: bus: change _ADR representation to 64 bits") but
some helpers still assume the value is a 32-bit value.

This patch adds a new helper to extract the full 64-bits. The existing
32-bit helper is kept for backwards-compatibility and cases where the
_ADR is known to fit in a 32-bit value.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20240528192936.16180-2-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 28c3fb2bef0d..65e7177bcb02 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -761,6 +761,7 @@ static inline u64 acpi_arch_get_root_pointer(void)
 }
 #endif
 
+int acpi_get_local_u64_address(acpi_handle handle, u64 *addr);
 int acpi_get_local_address(acpi_handle handle, u32 *addr);
 const char *acpi_get_subsystem_id(acpi_handle handle);
 
-- 
cgit v1.2.3


From e289df82344fecc104ad8326b9ab6da612b9c899 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 31 May 2024 22:42:40 +0300
Subject: spi: Rework per message DMA mapped flag to be per transfer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The granularity of DMA mappings is transfer and moreover,
the direction is also important as it can be unidirect.

The current cur_msg_mapped flag doesn't fit well the DMA mapping
and syncing calls and we have tons of checks around on top of it.
So, instead of doing that rework the code to use per transfer per
direction flag to show if it's DMA mapped or not.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Neil Armstrong <neil.armstrong@linaro.org> # on SM8650-QRD
Link: https://lore.kernel.org/r/20240531194723.1761567-9-andriy.shevchenko@linux.intel.com
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e8e1e798924f..b4a89db4c855 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -447,7 +447,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
  * @cur_msg_need_completion: Flag used internally to opportunistically skip
  *	the @cur_msg_completion. This flag is used to signal the context that
  *	is running spi_finalize_current_message() that it needs to complete()
- * @cur_msg_mapped: message has been mapped for DMA
  * @fallback: fallback to PIO if DMA transfer return failure with
  *	SPI_TRANS_FAIL_NO_START.
  * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs.
@@ -708,7 +707,6 @@ struct spi_controller {
 	bool				running;
 	bool				rt;
 	bool				auto_runtime_pm;
-	bool				cur_msg_mapped;
 	bool                            fallback;
 	bool				last_cs_mode_high;
 	s8				last_cs[SPI_CS_CNT_MAX];
@@ -981,6 +979,8 @@ struct spi_res {
  *      transfer this transfer. Set to 0 if the SPI bus driver does
  *      not support it.
  * @transfer_list: transfers are sequenced through @spi_message.transfers
+ * @tx_sg_mapped: If true, the @tx_sg is mapped for DMA
+ * @rx_sg_mapped: If true, the @rx_sg is mapped for DMA
  * @tx_sg: Scatterlist for transmit, currently not for client use
  * @rx_sg: Scatterlist for receive, currently not for client use
  * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset
@@ -1077,10 +1077,13 @@ struct spi_transfer {
 #define SPI_TRANS_FAIL_IO	BIT(1)
 	u16		error;
 
-	dma_addr_t	tx_dma;
-	dma_addr_t	rx_dma;
+	bool		tx_sg_mapped;
+	bool		rx_sg_mapped;
+
 	struct sg_table tx_sg;
 	struct sg_table rx_sg;
+	dma_addr_t	tx_dma;
+	dma_addr_t	rx_dma;
 
 	unsigned	dummy_data:1;
 	unsigned	cs_off:1;
-- 
cgit v1.2.3


From 24d07f114e4ec7608659a4ef18307f76739c72a8 Mon Sep 17 00:00:00 2001
From: Jocelyn Falempe <jfalempe@redhat.com>
Date: Mon, 3 Jun 2024 11:47:26 +0200
Subject: drm/panic: Add a set_pixel() callback to drm_scanout_buffer

This allows drivers to draw the pixel, and handle tiling, or specific
color formats.

v2:
 * Use fg_color for blit() functions (Javier Martinez Canillas)

Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240603095343.39588-3-jfalempe@redhat.com
---
 include/drm/drm_panic.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h
index 822dbb1aa9d6..73bb3f3d9ed9 100644
--- a/include/drm/drm_panic.h
+++ b/include/drm/drm_panic.h
@@ -50,6 +50,15 @@ struct drm_scanout_buffer {
 	 * @pitch: Length in bytes between the start of two consecutive lines.
 	 */
 	unsigned int pitch[DRM_FORMAT_MAX_PLANES];
+
+	/**
+	 * @set_pixel: Optional function, to set a pixel color on the
+	 * framebuffer. It allows to handle special tiling format inside the
+	 * driver.
+	 */
+	void (*set_pixel)(struct drm_scanout_buffer *sb, unsigned int x,
+			  unsigned int y, u32 color);
+
 };
 
 /**
-- 
cgit v1.2.3


From 1f020495458396766496ee067130f507a4d718e4 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 31 May 2024 22:37:46 +0200
Subject: drm/bridge: Drop drm_bridge_chain_mode_fixup

There are no users left of drm_bridge_chain_mode_fixup() and we
do not want to have this function available, so drop it.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Robert Foss <rfoss@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240531-bridge_chain_mode-v1-2-8b49e36c5dd3@ravnborg.org
---
 include/drm/drm_bridge.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
index 4baca0d9107b..5cf41f92d1f0 100644
--- a/include/drm/drm_bridge.h
+++ b/include/drm/drm_bridge.h
@@ -855,9 +855,6 @@ drm_bridge_chain_get_first_bridge(struct drm_encoder *encoder)
 #define drm_for_each_bridge_in_chain(encoder, bridge)			\
 	list_for_each_entry(bridge, &(encoder)->bridge_chain, chain_node)
 
-bool drm_bridge_chain_mode_fixup(struct drm_bridge *bridge,
-				 const struct drm_display_mode *mode,
-				 struct drm_display_mode *adjusted_mode);
 enum drm_mode_status
 drm_bridge_chain_mode_valid(struct drm_bridge *bridge,
 			    const struct drm_display_info *info,
-- 
cgit v1.2.3


From 5fbf57a937f418fe204f9dbb7735e91984f4ee6a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 6 Jun 2024 12:29:06 -0700
Subject: net: netlink: remove the cb_mutex "injection" from netlink core

Back in 2007, in commit af65bdfce98d ("[NETLINK]: Switch cb_lock spinlock
to mutex and allow to override it") netlink core was extended to allow
subsystems to replace the dump mutex lock with its own lock.

The mechanism was used by rtnetlink to take rtnl_lock but it isn't
sufficiently flexible for other users. Over the 17 years since
it was added no other user appeared. Since rtnetlink needs conditional
locking now, and doesn't use it either, axe this feature complete.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 5df7340d4dab..b332c2048c75 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -47,7 +47,6 @@ struct netlink_kernel_cfg {
 	unsigned int	groups;
 	unsigned int	flags;
 	void		(*input)(struct sk_buff *skb);
-	struct mutex	*cb_mutex;
 	int		(*bind)(struct net *net, int group);
 	void		(*unbind)(struct net *net, int group);
 	void            (*release) (struct sock *sk, unsigned long *groups);
-- 
cgit v1.2.3


From c6ae073f5903f6c6439d0ac855836a4da5c0a701 Mon Sep 17 00:00:00 2001
From: Gal Pressman <gal@nvidia.com>
Date: Thu, 6 Jun 2024 23:32:48 +0300
Subject: geneve: Fix incorrect inner network header offset when
 innerprotoinherit is set

When innerprotoinherit is set, the tunneled packets do not have an inner
Ethernet header.
Change 'maclen' to not always assume the header length is ETH_HLEN, as
there might not be a MAC header.

This resolves issues with drivers (e.g. mlx5, in
mlx5e_tx_tunnel_accel()) who rely on the skb inner network header offset
to be correct, and use it for TX offloads.

Fixes: d8a6213d70ac ("geneve: fix header validation in geneve[6]_xmit_skb")
Signed-off-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 9a6a08ec7713..1db2417b8ff5 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -461,9 +461,10 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb)
 
 /* Variant of pskb_inet_may_pull().
  */
-static inline bool skb_vlan_inet_prepare(struct sk_buff *skb)
+static inline bool skb_vlan_inet_prepare(struct sk_buff *skb,
+					 bool inner_proto_inherit)
 {
-	int nhlen = 0, maclen = ETH_HLEN;
+	int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN;
 	__be16 type = skb->protocol;
 
 	/* Essentially this is skb_protocol(skb, true)
-- 
cgit v1.2.3


From 806a5198c05987b748b50f3d0c0cfb3d417381a4 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 20 May 2024 16:03:07 -0400
Subject: Bluetooth: L2CAP: Fix rejecting L2CAP_CONN_PARAM_UPDATE_REQ

This removes the bogus check for max > hcon->le_conn_max_interval since
the later is just the initial maximum conn interval not the maximum the
stack could support which is really 3200=4000ms.

In order to pass GAP/CONN/CPUP/BV-05-C one shall probably enter values
of the following fields in IXIT that would cause hci_check_conn_params
to fail:

TSPX_conn_update_int_min
TSPX_conn_update_int_max
TSPX_conn_update_peripheral_latency
TSPX_conn_update_supervision_timeout

Link: https://github.com/bluez/bluez/issues/847
Fixes: e4b019515f95 ("Bluetooth: Enforce validation on max value of connection interval")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_core.h | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 9231396fe96f..c43716edf205 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -2113,18 +2113,46 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency,
 {
 	u16 max_latency;
 
-	if (min > max || min < 6 || max > 3200)
+	if (min > max) {
+		BT_WARN("min %d > max %d", min, max);
 		return -EINVAL;
+	}
+
+	if (min < 6) {
+		BT_WARN("min %d < 6", min);
+		return -EINVAL;
+	}
+
+	if (max > 3200) {
+		BT_WARN("max %d > 3200", max);
+		return -EINVAL;
+	}
+
+	if (to_multiplier < 10) {
+		BT_WARN("to_multiplier %d < 10", to_multiplier);
+		return -EINVAL;
+	}
 
-	if (to_multiplier < 10 || to_multiplier > 3200)
+	if (to_multiplier > 3200) {
+		BT_WARN("to_multiplier %d > 3200", to_multiplier);
 		return -EINVAL;
+	}
 
-	if (max >= to_multiplier * 8)
+	if (max >= to_multiplier * 8) {
+		BT_WARN("max %d >= to_multiplier %d * 8", max, to_multiplier);
 		return -EINVAL;
+	}
 
 	max_latency = (to_multiplier * 4 / max) - 1;
-	if (latency > 499 || latency > max_latency)
+	if (latency > 499) {
+		BT_WARN("latency %d > 499", latency);
 		return -EINVAL;
+	}
+
+	if (latency > max_latency) {
+		BT_WARN("latency %d > max_latency %d", latency, max_latency);
+		return -EINVAL;
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 8a74e4eaa72c14f997df6d820effb6aac400d470 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 10 Jun 2024 10:20:53 +0200
Subject: PCI: switchtec: Make switchtec_class constant

Now that the driver core allows for struct class to be in read-only memory,
we should make all 'class' structures declared at build time placing them
into read-only memory, instead of having to be dynamically allocated at
runtime.

Link: https://lore.kernel.org/r/2024061053-online-unwound-b173@gregkh
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-By: Logan Gunthorpe <logang@deltatee.com>
Cc: Kurt Schwemmer <kurt.schwemmer@microsemi.com>
Cc: Jon Mason <jdmason@kudzu.us>
Cc: Allen Hubbe <allenbh@gmail.com>
---
 include/linux/switchtec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h
index 8d8fac1626bd..cdb58d61c152 100644
--- a/include/linux/switchtec.h
+++ b/include/linux/switchtec.h
@@ -521,6 +521,6 @@ static inline struct switchtec_dev *to_stdev(struct device *dev)
 	return container_of(dev, struct switchtec_dev, dev);
 }
 
-extern struct class *switchtec_class;
+extern const struct class switchtec_class;
 
 #endif
-- 
cgit v1.2.3


From 1d4ce389da2b84e0e24aad5e83fe9c742adc3f99 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 7 Jun 2024 14:22:33 -0700
Subject: ice: add and use roundup_u64 instead of open coding equivalent

In ice_ptp_cfg_clkout(), the ice driver needs to calculate the nearest next
second of a current time value specified in nanoseconds. It implements this
using div64_u64, because the time value is a u64. It could use div_u64
since NSEC_PER_SEC is smaller than 32-bits.

Ideally this would be implemented directly with roundup(), but that can't
work on all platforms due to a division which requires using the specific
macros and functions due to platform restrictions, and to ensure that the
most appropriate and fast instructions are used.

The kernel doesn't currently provide any 64-bit equivalents for doing
roundup. Attempting to use roundup() on a 32-bit platform will result in a
link failure due to not having a direct 64-bit division.

The closest equivalent for this is DIV64_U64_ROUND_UP, which does a
division always rounding up. However, this only computes the division, and
forces use of the div64_u64 in cases where the divisor is a 32bit value and
could make use of div_u64.

Introduce DIV_U64_ROUND_UP based on div_u64, and then use it to implement
roundup_u64 which takes a u64 input value and a u32 rounding value.

The name roundup_u64 matches the naming scheme of div_u64, and future
patches could implement roundup64_u64 if they need to round by a multiple
that is greater than 32-bits.

Replace the logic in ice_ptp.c which does this equivalent with the newly
added roundup_u64.

Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240607-next-2024-06-03-intel-next-batch-v3-2-d1470cee3347@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/math64.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/linux/math64.h b/include/linux/math64.h
index d34def7f9a8c..6aaccc1626ab 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -297,6 +297,19 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);
 #define DIV64_U64_ROUND_UP(ll, d)	\
 	({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); })
 
+/**
+ * DIV_U64_ROUND_UP - unsigned 64bit divide with 32bit divisor rounded up
+ * @ll: unsigned 64bit dividend
+ * @d: unsigned 32bit divisor
+ *
+ * Divide unsigned 64bit dividend by unsigned 32bit divisor
+ * and round up.
+ *
+ * Return: dividend / divisor rounded up
+ */
+#define DIV_U64_ROUND_UP(ll, d)		\
+	({ u32 _tmp = (d); div_u64((ll) + _tmp - 1, _tmp); })
+
 /**
  * DIV64_U64_ROUND_CLOSEST - unsigned 64bit divide with 64bit divisor rounded to nearest integer
  * @dividend: unsigned 64bit dividend
@@ -342,4 +355,19 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);
 		div_s64((__x - (__d / 2)), __d);	\
 }							\
 )
+
+/**
+ * roundup_u64 - Round up a 64bit value to the next specified 32bit multiple
+ * @x: the value to up
+ * @y: 32bit multiple to round up to
+ *
+ * Rounds @x to the next multiple of @y. For 32bit @x values, see roundup and
+ * the faster round_up() for powers of 2.
+ *
+ * Return: rounded up value.
+ */
+static inline u64 roundup_u64(u64 x, u32 y)
+{
+	return DIV_U64_ROUND_UP(x, y) * y;
+}
 #endif /* _LINUX_MATH64_H */
-- 
cgit v1.2.3


From 5f7fb89a115d53b4a10bf7ba2733e78df281e98d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Mon, 10 Jun 2024 23:09:36 -0400
Subject: function_graph: Everyone uses HAVE_FUNCTION_GRAPH_RET_ADDR_PTR,
 remove it

All architectures that implement function graph also implements
HAVE_FUNCTION_GRAPH_RET_ADDR_PTR. Remove it, as it is no longer a
differentiator.

Link: https://lore.kernel.org/linux-trace-kernel/20240611031737.982047614@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4135dc171447..845c2ab0bc1c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1071,9 +1071,7 @@ struct ftrace_ret_stack {
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	unsigned long fp;
 #endif
-#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 	unsigned long *retp;
-#endif
 };
 
 /*
-- 
cgit v1.2.3


From e8343410ddf08fc36a9b9cc7c51a4e53a262d4c6 Mon Sep 17 00:00:00 2001
From: Jai Luthra <j-luthra@ti.com>
Date: Tue, 11 Jun 2024 18:02:55 +0530
Subject: ALSA: dmaengine: Synchronize dma channel after drop()

Sometimes the stream may be stopped due to XRUN events, in which case
the userspace can call snd_pcm_drop() and snd_pcm_prepare() to stop and
start the stream again.

In these cases, we must wait for the DMA channel to synchronize before
marking the stream as prepared for playback, as the DMA channel gets
stopped by drop() without any synchronization. Make sure the ALSA core
synchronizes the DMA channel by adding a sync_stop() hook.

Reviewed-by: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Signed-off-by: Jai Luthra <j-luthra@ti.com>
Link: https://lore.kernel.org/r/20240611-asoc_next-v3-1-fcfd84b12164@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/dmaengine_pcm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h
index c11aaf8079fb..f6baa9a01868 100644
--- a/include/sound/dmaengine_pcm.h
+++ b/include/sound/dmaengine_pcm.h
@@ -36,6 +36,7 @@ snd_pcm_uframes_t snd_dmaengine_pcm_pointer_no_residue(struct snd_pcm_substream
 int snd_dmaengine_pcm_open(struct snd_pcm_substream *substream,
 	struct dma_chan *chan);
 int snd_dmaengine_pcm_close(struct snd_pcm_substream *substream);
+int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream);
 
 int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream,
 	dma_filter_fn filter_fn, void *filter_data);
-- 
cgit v1.2.3


From 85542adb65ecd7cc0e442e8befef74f2ed07f5f6 Mon Sep 17 00:00:00 2001
From: Thomas Prescher <thomas.prescher@cyberus-technology.de>
Date: Wed, 8 May 2024 15:25:01 +0200
Subject: KVM: x86: Add KVM_RUN_X86_GUEST_MODE kvm_run flag

When a vCPU is interrupted by a signal while running a nested guest,
KVM will exit to userspace with L2 state. However, userspace has no
way to know whether it sees L1 or L2 state (besides calling
KVM_GET_STATS_FD, which does not have a stable ABI).

This causes multiple problems:

The simplest one is L2 state corruption when userspace marks the sregs
as dirty. See this mailing list thread [1] for a complete discussion.

Another problem is that if userspace decides to continue by emulating
instructions, it will unknowingly emulate with L2 state as if L1
doesn't exist, which can be considered a weird guest escape.

Introduce a new flag, KVM_RUN_X86_GUEST_MODE, in the kvm_run data
structure, which is set when the vCPU exited while running a nested
guest.  Also introduce a new capability, KVM_CAP_X86_GUEST_MODE, to
advertise the functionality to userspace.

[1] https://lore.kernel.org/kvm/20240416123558.212040-1-julian.stecklina@cyberus-technology.de/T/#m280aadcb2e10ae02c191a7dc4ed4b711a74b1f55

Signed-off-by: Thomas Prescher <thomas.prescher@cyberus-technology.de>
Signed-off-by: Julian Stecklina <julian.stecklina@cyberus-technology.de>
Link: https://lore.kernel.org/r/20240508132502.184428-1-julian.stecklina@cyberus-technology.de
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/uapi/linux/kvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index ec998e6b6555..a6ac00ec77ad 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -918,6 +918,7 @@ struct kvm_enable_cap {
 #define KVM_CAP_GUEST_MEMFD 234
 #define KVM_CAP_VM_TYPES 235
 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 236
+#define KVM_CAP_X86_GUEST_MODE 237
 
 struct kvm_irq_routing_irqchip {
 	__u32 irqchip;
-- 
cgit v1.2.3


From f328a26eeb5380bc74e58cb9c3280a4908452df7 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Mon, 3 Jun 2024 17:55:55 -0400
Subject: dlm: introduce DLM_LSFL_SOFTIRQ_SAFE

Introduce a new external lockspace flag DLM_LSFL_SOFTIRQ_SAFE.  A
lockspace user will set this flag if it can handle dlm running the
callback functions from softirq context.  When not set, dlm will
continue to run callback functions from the dlm_callback workqueue.
The new lockspace flag cannot be used for user space lockspaces, so
a uapi placeholder definition is used for the new flag value.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 include/linux/dlm.h      | 17 ++++++++++++++++-
 include/uapi/linux/dlm.h |  2 ++
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index c58c4f790c04..bacda9898f2b 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -35,6 +35,9 @@ struct dlm_lockspace_ops {
 			      int num_slots, int our_slot, uint32_t generation);
 };
 
+/* only relevant for kernel lockspaces, will be removed in future */
+#define DLM_LSFL_SOFTIRQ __DLM_LSFL_RESERVED0
+
 /*
  * dlm_new_lockspace
  *
@@ -55,6 +58,11 @@ struct dlm_lockspace_ops {
  *   used to select the directory node.  Must be the same on all nodes.
  * DLM_LSFL_NEWEXCL
  *   dlm_new_lockspace() should return -EEXIST if the lockspace exists.
+ * DLM_LSFL_SOFTIRQ
+ *   dlm request callbacks (ast, bast) are softirq safe. Flag should be
+ *   preferred by users. Will be default in some future. If set the
+ *   strongest context for ast, bast callback is softirq as it avoids
+ *   an additional context switch.
  *
  * lvblen: length of lvb in bytes.  Must be multiple of 8.
  *   dlm_new_lockspace() returns an error if this does not match
@@ -121,7 +129,14 @@ int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force);
  * call.
  *
  * AST routines should not block (at least not for long), but may make
- * any locking calls they please.
+ * any locking calls they please. If DLM_LSFL_SOFTIRQ for kernel
+ * users of dlm_new_lockspace() is passed the ast and bast callbacks
+ * can be processed in softirq context. Also some of the callback
+ * contexts are in the same context as the DLM lock request API, users
+ * must not hold locks while calling dlm lock request API and trying
+ * to acquire this lock in the callback again, this will end in a
+ * lock recursion. For newer implementation the DLM_LSFL_SOFTIRQ
+ * should be used.
  */
 
 int dlm_lock(dlm_lockspace_t *lockspace,
diff --git a/include/uapi/linux/dlm.h b/include/uapi/linux/dlm.h
index e7e905fb0bb2..4eaf835780b0 100644
--- a/include/uapi/linux/dlm.h
+++ b/include/uapi/linux/dlm.h
@@ -71,6 +71,8 @@ struct dlm_lksb {
 /* DLM_LSFL_TIMEWARN is deprecated and reserved. DO NOT USE! */
 #define DLM_LSFL_TIMEWARN	0x00000002
 #define DLM_LSFL_NEWEXCL     	0x00000008
+/* currently reserved due in-kernel use */
+#define __DLM_LSFL_RESERVED0	0x00000010
 
 
 #endif /* _UAPI__DLM_DOT_H__ */
-- 
cgit v1.2.3


From 10b8e0fd3f7234a38db2c8d2c8dec0bd6eeede44 Mon Sep 17 00:00:00 2001
From: Amelie Delaunay <amelie.delaunay@foss.st.com>
Date: Fri, 31 May 2024 17:07:10 +0200
Subject: dmaengine: add channel device name to channel registration

Channel device name is used for sysfs, but also by dmatest filter function.

With dynamic channel registration, channels can be registered after dma
controller registration. Users may want to have specific channel names.

If name is NULL, the channel name relies on previous implementation,
dma<controller_device_id>chan<channel_device_id>.

Signed-off-by: Amelie Delaunay <amelie.delaunay@foss.st.com>
Link: https://lore.kernel.org/r/20240531150712.2503554-11-amelie.delaunay@foss.st.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dmaengine.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 752dbde4cec1..73537fddbb52 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -1575,7 +1575,8 @@ int dma_async_device_register(struct dma_device *device);
 int dmaenginem_async_device_register(struct dma_device *device);
 void dma_async_device_unregister(struct dma_device *device);
 int dma_async_device_channel_register(struct dma_device *device,
-				      struct dma_chan *chan);
+				      struct dma_chan *chan,
+				      const char *name);
 void dma_async_device_channel_unregister(struct dma_device *device,
 					 struct dma_chan *chan);
 void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
-- 
cgit v1.2.3


From 3ff1180a39fbc43ae69d4238e6922c57e3278910 Mon Sep 17 00:00:00 2001
From: Andrew Davis <afd@ti.com>
Date: Mon, 10 Jun 2024 08:53:13 -0500
Subject: gpiolib: Remove data-less gpiochip_add() function

GPIO chips should be added with driver-private data associated with the
chip. If none is needed, NULL can be used. All users already do this
except one, fix that here. With no more users of the base gpiochip_add()
we can drop this function so no more users show up later.

Signed-off-by: Andrew Davis <afd@ti.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20240610135313.142571-1-afd@ti.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/driver.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 0032bb6e7d8f..6d31388dde0a 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -632,10 +632,6 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 	devm_gpiochip_add_data_with_key(dev, gc, data, NULL, NULL)
 #endif /* CONFIG_LOCKDEP */
 
-static inline int gpiochip_add(struct gpio_chip *gc)
-{
-	return gpiochip_add_data(gc, NULL);
-}
 void gpiochip_remove(struct gpio_chip *gc);
 int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc,
 				    void *data, struct lock_class_key *lock_key,
-- 
cgit v1.2.3


From fbe4a7e881d4408bfabbb4fd538f10fd686cd8ab Mon Sep 17 00:00:00 2001
From: Yi Wang <foxywang@tencent.com>
Date: Mon, 6 May 2024 18:17:49 +0800
Subject: KVM: Setup empty IRQ routing when creating a VM

Setup empty IRQ routing during VM creation so that x86 and s390 don't need
to set empty/dummy IRQ routing during KVM_CREATE_IRQCHIP (in future
patches).  Initializing IRQ routing before there are any potential readers
allows KVM to avoid the synchronize_srcu() in kvm_set_irq_routing(), which
can introduces 20+ milliseconds of latency in the VM creation path.

Ensuring that all VMs have non-NULL IRQ routing also hardens KVM against
misbehaving userspace VMMs, e.g. RISC-V dynamically instantiates its
interrupt controller, but doesn't override kvm_arch_intc_initialized() or
kvm_arch_irqfd_allowed(), and so can likely reach kvm_irq_map_gsi()
without fully initialized IRQ routing.

Signed-off-by: Yi Wang <foxywang@tencent.com>
Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Link: https://lore.kernel.org/r/20240506101751.3145407-2-foxywang@tencent.com
[sean: init refcount after IRQ routing, fix stub, massage changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c80fe03a1fa4..eaa70e9b1218 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2094,6 +2094,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *entries,
 			unsigned nr,
 			unsigned flags);
+int kvm_init_irq_routing(struct kvm *kvm);
 int kvm_set_routing_entry(struct kvm *kvm,
 			  struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue);
@@ -2103,6 +2104,11 @@ void kvm_free_irq_routing(struct kvm *kvm);
 
 static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
+static inline int kvm_init_irq_routing(struct kvm *kvm)
+{
+	return 0;
+}
+
 #endif
 
 int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
-- 
cgit v1.2.3


From d1ae567fb8b559401a9f65290bbb0cef5e987bfe Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 21 May 2024 18:40:08 -0700
Subject: KVM: Add a flag to track if a loaded vCPU is scheduled out

Add a kvm_vcpu.scheduled_out flag to track if a vCPU is in the process of
being scheduled out (vCPU put path), or if the vCPU is being reloaded
after being scheduled out (vCPU load path).  In the short term, this will
allow dropping kvm_arch_sched_in(), as arch code can query scheduled_out
during kvm_arch_vcpu_load().

Longer term, scheduled_out opens up other potential optimizations, without
creating subtle/brittle dependencies.  E.g. it allows KVM to keep guest
state (that is managed via kvm_arch_vcpu_{load,put}()) loaded across
kvm_sched_{out,in}(), if KVM knows the state isn't accessed by the host
kernel.  Forcing arch code to coordinate between kvm_arch_sched_{in,out}()
and kvm_arch_vcpu_{load,put}() is awkward, not reusable, and relies on the
exact ordering of calls into arch code.

Adding scheduled_out also obviates the need for a kvm_arch_sched_out()
hook, e.g. if arch code needs to do something novel when putting vCPU
state.

And even if KVM never uses scheduled_out for anything beyond dropping
kvm_arch_sched_in(), just being able to remove all of the arch stubs makes
it worth adding the flag.

Link: https://lore.kernel.org/all/20240430224431.490139-1-seanjc@google.com
Cc: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Acked-by: Kai Huang <kai.huang@intel.com>
Link: https://lore.kernel.org/r/20240522014013.1672962-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index eaa70e9b1218..5e04c6cae34a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -380,6 +380,7 @@ struct kvm_vcpu {
 #endif
 	bool preempted;
 	bool ready;
+	bool scheduled_out;
 	struct kvm_vcpu_arch arch;
 	struct kvm_vcpu_stat stat;
 	char stats_id[KVM_STATS_NAME_SIZE];
-- 
cgit v1.2.3


From 2a27c431400797e0044872283d1971aa372fcd3a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 21 May 2024 18:40:11 -0700
Subject: KVM: Delete the now unused kvm_arch_sched_in()

Delete kvm_arch_sched_in() now that all implementations are nops.

Reviewed-by: Bibo Mao <maobibo@loongson.cn>
Acked-by: Kai Huang <kai.huang@intel.com>
Link: https://lore.kernel.org/r/20240522014013.1672962-5-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5e04c6cae34a..7b9d2633a931 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1495,8 +1495,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					struct kvm_guest_debug *dbg);
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
 
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id);
-- 
cgit v1.2.3


From 190fec72df4a5d4d98b1e783c333f471e5e5f344 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 12 Jun 2024 08:44:27 +0900
Subject: uprobe: Wire up uretprobe system call

Wiring up uretprobe system call, which comes in following changes.
We need to do the wiring before, because the uretprobe implementation
needs the syscall number.

Note at the moment uretprobe syscall is supported only for native
64-bit process.

Link: https://lore.kernel.org/all/20240611112158.40795-3-jolsa@kernel.org/

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 include/linux/syscalls.h          | 2 ++
 include/uapi/asm-generic/unistd.h | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 9104952d323d..494f5e0f61f7 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -973,6 +973,8 @@ asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags);
 /* x86 */
 asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on);
 
+asmlinkage long sys_uretprobe(void);
+
 /* pciconfig: alpha, arm, arm64, ia64, sparc */
 asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn,
 				unsigned long off, unsigned long len,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index d983c48a3b6a..2378f88d5ad4 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -845,8 +845,11 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
 #define __NR_mseal 462
 __SYSCALL(__NR_mseal, sys_mseal)
 
+#define __NR_uretprobe 463
+__SYSCALL(__NR_uretprobe, sys_uretprobe)
+
 #undef __NR_syscalls
-#define __NR_syscalls 463
+#define __NR_syscalls 464
 
 /*
  * 32 bit systems traditionally used different
-- 
cgit v1.2.3


From ff474a78cef5cb5f32be52fe25b78441327a2e7c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 12 Jun 2024 08:44:28 +0900
Subject: uprobe: Add uretprobe syscall to speed up return probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding uretprobe syscall instead of trap to speed up return probe.

At the moment the uretprobe setup/path is:

  - install entry uprobe

  - when the uprobe is hit, it overwrites probed function's return address
    on stack with address of the trampoline that contains breakpoint
    instruction

  - the breakpoint trap code handles the uretprobe consumers execution and
    jumps back to original return address

This patch replaces the above trampoline's breakpoint instruction with new
ureprobe syscall call. This syscall does exactly the same job as the trap
with some more extra work:

  - syscall trampoline must save original value for rax/r11/rcx registers
    on stack - rax is set to syscall number and r11/rcx are changed and
    used by syscall instruction

  - the syscall code reads the original values of those registers and
    restore those values in task's pt_regs area

  - only caller from trampoline exposed in '[uprobes]' is allowed,
    the process will receive SIGILL signal otherwise

Even with some extra work, using the uretprobes syscall shows speed
improvement (compared to using standard breakpoint):

  On Intel (11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz)

  current:
    uretprobe-nop  :    1.498 ± 0.000M/s
    uretprobe-push :    1.448 ± 0.001M/s
    uretprobe-ret  :    0.816 ± 0.001M/s

  with the fix:
    uretprobe-nop  :    1.969 ± 0.002M/s  < 31% speed up
    uretprobe-push :    1.910 ± 0.000M/s  < 31% speed up
    uretprobe-ret  :    0.934 ± 0.000M/s  < 14% speed up

  On Amd (AMD Ryzen 7 5700U)

  current:
    uretprobe-nop  :    0.778 ± 0.001M/s
    uretprobe-push :    0.744 ± 0.001M/s
    uretprobe-ret  :    0.540 ± 0.001M/s

  with the fix:
    uretprobe-nop  :    0.860 ± 0.001M/s  < 10% speed up
    uretprobe-push :    0.818 ± 0.001M/s  < 10% speed up
    uretprobe-ret  :    0.578 ± 0.000M/s  <  7% speed up

The performance test spawns a thread that runs loop which triggers
uprobe with attached bpf program that increments the counter that
gets printed in results above.

The uprobe (and uretprobe) kind is determined by which instruction
is being patched with breakpoint instruction. That's also important
for uretprobes, because uprobe is installed for each uretprobe.

The performance test is part of bpf selftests:
  tools/testing/selftests/bpf/run_bench_uprobes.sh

Note at the moment uretprobe syscall is supported only for native
64-bit process, compat process still uses standard breakpoint.

Note that when shadow stack is enabled the uretprobe syscall returns
via iret, which is slower than return via sysret, but won't cause the
shadow stack violation.

Link: https://lore.kernel.org/all/20240611112158.40795-4-jolsa@kernel.org/

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 include/linux/uprobes.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index f46e0ca0169c..b503fafb7fb3 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -138,6 +138,9 @@ extern bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check c
 extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
 extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
 					 void *src, unsigned long len);
+extern void uprobe_handle_trampoline(struct pt_regs *regs);
+extern void *arch_uprobe_trampoline(unsigned long *psize);
+extern unsigned long uprobe_get_trampoline_vaddr(void);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
-- 
cgit v1.2.3


From 90e6f08915ec6efe46570420412a65050ec826b2 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Tue, 11 Jun 2024 17:34:35 +0900
Subject: scsi: mpi3mr: Fix ATA NCQ priority support

The function mpi3mr_qcmd() of the mpi3mr driver is able to indicate to
the HBA if a read or write command directed at an ATA device should be
translated to an NCQ read/write command with the high prioiryt bit set
when the request uses the RT priority class and the user has enabled NCQ
priority through sysfs.

However, unlike the mpt3sas driver, the mpi3mr driver does not define
the sas_ncq_prio_supported and sas_ncq_prio_enable sysfs attributes, so
the ncq_prio_enable field of struct mpi3mr_sdev_priv_data is never
actually set and NCQ Priority cannot ever be used.

Fix this by defining these missing atributes to allow a user to check if
an ATA device supports NCQ priority and to enable/disable the use of NCQ
priority. To do this, lift the function scsih_ncq_prio_supp() out of the
mpt3sas driver and make it the generic SCSI SAS transport function
sas_ata_ncq_prio_supported(). Nothing in that function is hardware
specific, so this function can be used in both the mpt3sas driver and
the mpi3mr driver.

Reported-by: Scott McCoy <scott.mccoy@wdc.com>
Fixes: 023ab2a9b4ed ("scsi: mpi3mr: Add support for queue command processing")
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20240611083435.92961-1-dlemoal@kernel.org
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/scsi_transport_sas.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h
index 0e75b9277c8c..e3b6ce3cbf88 100644
--- a/include/scsi/scsi_transport_sas.h
+++ b/include/scsi/scsi_transport_sas.h
@@ -200,6 +200,8 @@ unsigned int sas_is_tlr_enabled(struct scsi_device *);
 void sas_disable_tlr(struct scsi_device *);
 void sas_enable_tlr(struct scsi_device *);
 
+bool sas_ata_ncq_prio_supported(struct scsi_device *sdev);
+
 extern struct sas_rphy *sas_end_device_alloc(struct sas_port *);
 extern struct sas_rphy *sas_expander_alloc(struct sas_port *, enum sas_device_type);
 void sas_rphy_free(struct sas_rphy *);
-- 
cgit v1.2.3


From fa59dc2f6fc616fde3941f62ccd4adcaa21ccd47 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@codeconstruct.com.au>
Date: Fri, 7 Jun 2024 18:25:24 +0800
Subject: net: core,vrf: Change pcpu_dstat fields to u64_stats_t

The pcpu_sw_netstats and pcpu_lstats structs both contain a set of
u64_stats_t fields for individual stats, but pcpu_dstats uses u64s
instead.

Make this consistent by using u64_stats_t across all stats types.

The per-cpu dstats are only used by the vrf driver at present, so update
that driver as part of this change.

Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20240607-dstats-v3-1-cc781fe116f7@codeconstruct.com.au
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d20c6c99eb88..f148a01dd1d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2731,12 +2731,12 @@ struct pcpu_sw_netstats {
 } __aligned(4 * sizeof(u64));
 
 struct pcpu_dstats {
-	u64			rx_packets;
-	u64			rx_bytes;
-	u64			rx_drops;
-	u64			tx_packets;
-	u64			tx_bytes;
-	u64			tx_drops;
+	u64_stats_t		rx_packets;
+	u64_stats_t		rx_bytes;
+	u64_stats_t		rx_drops;
+	u64_stats_t		tx_packets;
+	u64_stats_t		tx_bytes;
+	u64_stats_t		tx_drops;
 	struct u64_stats_sync	syncp;
 } __aligned(8 * sizeof(u64));
 
-- 
cgit v1.2.3


From 144ba8580bcb82b2686c3d1a043299d844b9a682 Mon Sep 17 00:00:00 2001
From: Kory Maincent <kory.maincent@bootlin.com>
Date: Mon, 10 Jun 2024 10:34:26 +0200
Subject: net: pse-pd: Use EOPNOTSUPP error code instead of ENOTSUPP

ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP as reported by
checkpatch script.

Fixes: 18ff0bcda6d1 ("ethtool: add interface to interact with Ethernet Power Equipment")
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://lore.kernel.org/r/20240610083426.740660-1-kory.maincent@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/pse-pd/pse.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h
index 6d07c95dabb9..6eec24ffa866 100644
--- a/include/linux/pse-pd/pse.h
+++ b/include/linux/pse-pd/pse.h
@@ -167,14 +167,14 @@ static inline int pse_ethtool_get_status(struct pse_control *psec,
 					 struct netlink_ext_ack *extack,
 					 struct pse_control_status *status)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 
 static inline int pse_ethtool_set_config(struct pse_control *psec,
 					 struct netlink_ext_ack *extack,
 					 const struct pse_control_config *config)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 
 static inline bool pse_has_podl(struct pse_control *psec)
-- 
cgit v1.2.3


From 3966a668bfeef49e265e4975a2cdc55fb931036d Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <0x7f454c46@gmail.com>
Date: Fri, 7 Jun 2024 00:25:55 +0100
Subject: net/tcp: Use static_branch_tcp_{md5,ao} to drop ifdefs

It's possible to clean-up some ifdefs by hiding that
tcp_{md5,ao}_needed static branch is defined and compiled only
under related configs, since commit 4c8530dc7d7d ("net/tcp: Only produce
AO/MD5 logs if there are any keys").

Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a70fc39090fe..e5427b05129b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2386,21 +2386,15 @@ static inline void tcp_get_current_key(const struct sock *sk,
 
 static inline bool tcp_key_is_md5(const struct tcp_key *key)
 {
-#ifdef CONFIG_TCP_MD5SIG
-	if (static_branch_unlikely(&tcp_md5_needed.key) &&
-	    key->type == TCP_KEY_MD5)
-		return true;
-#endif
+	if (static_branch_tcp_md5())
+		return key->type == TCP_KEY_MD5;
 	return false;
 }
 
 static inline bool tcp_key_is_ao(const struct tcp_key *key)
 {
-#ifdef CONFIG_TCP_AO
-	if (static_branch_unlikely(&tcp_ao_needed.key) &&
-	    key->type == TCP_KEY_AO)
-		return true;
-#endif
+	if (static_branch_tcp_ao())
+		return key->type == TCP_KEY_AO;
 	return false;
 }
 
-- 
cgit v1.2.3


From 72863087f635367323693b9ab83c3107e0353c5f Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <0x7f454c46@gmail.com>
Date: Fri, 7 Jun 2024 00:25:56 +0100
Subject: net/tcp: Add a helper tcp_ao_hdr_maclen()

It's going to be used more in TCP-AO tracepoints.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp_ao.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
index 5d8e9ed2c005..198e02004ad2 100644
--- a/include/net/tcp_ao.h
+++ b/include/net/tcp_ao.h
@@ -19,6 +19,11 @@ struct tcp_ao_hdr {
 	u8	rnext_keyid;
 };
 
+static inline u8 tcp_ao_hdr_maclen(const struct tcp_ao_hdr *aoh)
+{
+	return aoh->length - sizeof(struct tcp_ao_hdr);
+}
+
 struct tcp_ao_counters {
 	atomic64_t	pkt_good;
 	atomic64_t	pkt_bad;
-- 
cgit v1.2.3


From 811efc06e5f30a57030451b2d1998aa81273baf8 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <0x7f454c46@gmail.com>
Date: Fri, 7 Jun 2024 00:25:57 +0100
Subject: net/tcp: Move tcp_inbound_hash() from headers

Two reasons:
1. It's grown up enough
2. In order to not do header spaghetti by including
   <trace/events/tcp.h>, which is necessary for TCP tracepoints.

While at it, unexport and make static tcp_inbound_ao_hash().

Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 78 +++----------------------------------------------------
 1 file changed, 4 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index e5427b05129b..2aac11e7e1cc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1863,12 +1863,6 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk,
 	return __tcp_md5_do_lookup(sk, 0, addr, family, true);
 }
 
-enum skb_drop_reason
-tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
-		     const void *saddr, const void *daddr,
-		     int family, int l3index, const __u8 *hash_location);
-
-
 #define tcp_twsk_md5_key(twsk)	((twsk)->tw_md5_key)
 #else
 static inline struct tcp_md5sig_key *
@@ -1885,13 +1879,6 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk,
 	return NULL;
 }
 
-static inline enum skb_drop_reason
-tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
-		     const void *saddr, const void *daddr,
-		     int family, int l3index, const __u8 *hash_location)
-{
-	return SKB_NOT_DROPPED_YET;
-}
 #define tcp_twsk_md5_key(twsk)	NULL
 #endif
 
@@ -2806,66 +2793,9 @@ static inline bool tcp_ao_required(struct sock *sk, const void *saddr,
 	return false;
 }
 
-/* Called with rcu_read_lock() */
-static inline enum skb_drop_reason
-tcp_inbound_hash(struct sock *sk, const struct request_sock *req,
-		 const struct sk_buff *skb,
-		 const void *saddr, const void *daddr,
-		 int family, int dif, int sdif)
-{
-	const struct tcphdr *th = tcp_hdr(skb);
-	const struct tcp_ao_hdr *aoh;
-	const __u8 *md5_location;
-	int l3index;
-
-	/* Invalid option or two times meet any of auth options */
-	if (tcp_parse_auth_options(th, &md5_location, &aoh)) {
-		tcp_hash_fail("TCP segment has incorrect auth options set",
-			      family, skb, "");
-		return SKB_DROP_REASON_TCP_AUTH_HDR;
-	}
-
-	if (req) {
-		if (tcp_rsk_used_ao(req) != !!aoh) {
-			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD);
-			tcp_hash_fail("TCP connection can't start/end using TCP-AO",
-				      family, skb, "%s",
-				      !aoh ? "missing AO" : "AO signed");
-			return SKB_DROP_REASON_TCP_AOFAILURE;
-		}
-	}
-
-	/* sdif set, means packet ingressed via a device
-	 * in an L3 domain and dif is set to the l3mdev
-	 */
-	l3index = sdif ? dif : 0;
-
-	/* Fast path: unsigned segments */
-	if (likely(!md5_location && !aoh)) {
-		/* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid
-		 * for the remote peer. On TCP-AO established connection
-		 * the last key is impossible to remove, so there's
-		 * always at least one current_key.
-		 */
-		if (tcp_ao_required(sk, saddr, family, l3index, true)) {
-			tcp_hash_fail("AO hash is required, but not found",
-					family, skb, "L3 index %d", l3index);
-			return SKB_DROP_REASON_TCP_AONOTFOUND;
-		}
-		if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) {
-			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
-			tcp_hash_fail("MD5 Hash not found",
-				      family, skb, "L3 index %d", l3index);
-			return SKB_DROP_REASON_TCP_MD5NOTFOUND;
-		}
-		return SKB_NOT_DROPPED_YET;
-	}
-
-	if (aoh)
-		return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh);
-
-	return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family,
-				    l3index, md5_location);
-}
+enum skb_drop_reason tcp_inbound_hash(struct sock *sk,
+		const struct request_sock *req, const struct sk_buff *skb,
+		const void *saddr, const void *daddr,
+		int family, int dif, int sdif);
 
 #endif	/* _TCP_H */
-- 
cgit v1.2.3


From 96be3dcd013df6aa79acf32e739c0a35b89a4f50 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <0x7f454c46@gmail.com>
Date: Fri, 7 Jun 2024 00:25:58 +0100
Subject: net/tcp: Add tcp-md5 and tcp-ao tracepoints

Instead of forcing userspace to parse dmesg (that's what currently is
happening, at least in codebase of my current company), provide a better
way, that can be enabled/disabled in runtime.

Currently, there are already tcp events, add hashing related ones there,
too. Rasdaemon currently exercises net_dev_xmit_timeout,
devlink_health_report, but it'll be trivial to teach it to deal with
failed hashes. Otherwise, BGP may trace/log them itself. Especially
exciting for possible investigations is key rotation (RNext_key
requests).

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/tcp.h | 317 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 317 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 49b5ee091cf6..1c8bd8e186b8 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -411,6 +411,323 @@ TRACE_EVENT(tcp_cong_state_set,
 		  __entry->cong_state)
 );
 
+DECLARE_EVENT_CLASS(tcp_hash_event,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+
+	TP_ARGS(sk, skb),
+
+	TP_STRUCT__entry(
+		__field(__u64, net_cookie)
+		__field(const void *, skbaddr)
+		__field(const void *, skaddr)
+		__field(int, state)
+
+		/* sockaddr_in6 is always bigger than sockaddr_in */
+		__array(__u8, saddr, sizeof(struct sockaddr_in6))
+		__array(__u8, daddr, sizeof(struct sockaddr_in6))
+		__field(int, l3index)
+
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__field(__u16, family)
+
+		__field(bool, fin)
+		__field(bool, syn)
+		__field(bool, rst)
+		__field(bool, psh)
+		__field(bool, ack)
+	),
+
+	TP_fast_assign(
+		const struct tcphdr *th = (const struct tcphdr *)skb->data;
+
+		__entry->net_cookie = sock_net(sk)->net_cookie;
+		__entry->skbaddr = skb;
+		__entry->skaddr = sk;
+		__entry->state = sk->sk_state;
+
+		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+		TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr);
+		__entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0;
+
+		/* For filtering use */
+		__entry->sport = ntohs(th->source);
+		__entry->dport = ntohs(th->dest);
+		__entry->family = sk->sk_family;
+
+		__entry->fin = th->fin;
+		__entry->syn = th->syn;
+		__entry->rst = th->rst;
+		__entry->psh = th->psh;
+		__entry->ack = th->ack;
+	),
+
+	TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c]",
+		  __entry->net_cookie,
+		  show_tcp_state_name(__entry->state),
+		  show_family_name(__entry->family),
+		  __entry->saddr, __entry->daddr,
+		  __entry->l3index,
+		  __entry->fin ? 'F' : ' ',
+		  __entry->syn ? 'S' : ' ',
+		  __entry->rst ? 'R' : ' ',
+		  __entry->psh ? 'P' : ' ',
+		  __entry->ack ? '.' : ' ')
+);
+
+DEFINE_EVENT(tcp_hash_event, tcp_hash_bad_header,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+	TP_ARGS(sk, skb)
+);
+
+DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_required,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+	TP_ARGS(sk, skb)
+);
+
+DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_unexpected,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+	TP_ARGS(sk, skb)
+);
+
+DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_mismatch,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+	TP_ARGS(sk, skb)
+);
+
+DEFINE_EVENT(tcp_hash_event, tcp_hash_ao_required,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+	TP_ARGS(sk, skb)
+);
+
+DECLARE_EVENT_CLASS(tcp_ao_event,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb,
+		 const __u8 keyid, const __u8 rnext, const __u8 maclen),
+
+	TP_ARGS(sk, skb, keyid, rnext, maclen),
+
+	TP_STRUCT__entry(
+		__field(__u64, net_cookie)
+		__field(const void *, skbaddr)
+		__field(const void *, skaddr)
+		__field(int, state)
+
+		/* sockaddr_in6 is always bigger than sockaddr_in */
+		__array(__u8, saddr, sizeof(struct sockaddr_in6))
+		__array(__u8, daddr, sizeof(struct sockaddr_in6))
+		__field(int, l3index)
+
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__field(__u16, family)
+
+		__field(bool, fin)
+		__field(bool, syn)
+		__field(bool, rst)
+		__field(bool, psh)
+		__field(bool, ack)
+
+		__field(__u8, keyid)
+		__field(__u8, rnext)
+		__field(__u8, maclen)
+	),
+
+	TP_fast_assign(
+		const struct tcphdr *th = (const struct tcphdr *)skb->data;
+
+		__entry->net_cookie = sock_net(sk)->net_cookie;
+		__entry->skbaddr = skb;
+		__entry->skaddr = sk;
+		__entry->state = sk->sk_state;
+
+		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+		TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr);
+		__entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0;
+
+		/* For filtering use */
+		__entry->sport = ntohs(th->source);
+		__entry->dport = ntohs(th->dest);
+		__entry->family = sk->sk_family;
+
+		__entry->fin = th->fin;
+		__entry->syn = th->syn;
+		__entry->rst = th->rst;
+		__entry->psh = th->psh;
+		__entry->ack = th->ack;
+
+		__entry->keyid = keyid;
+		__entry->rnext = rnext;
+		__entry->maclen = maclen;
+	),
+
+	TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u",
+		  __entry->net_cookie,
+		  show_tcp_state_name(__entry->state),
+		  show_family_name(__entry->family),
+		  __entry->saddr, __entry->daddr,
+		  __entry->l3index,
+		  __entry->fin ? 'F' : ' ',
+		  __entry->syn ? 'S' : ' ',
+		  __entry->rst ? 'R' : ' ',
+		  __entry->psh ? 'P' : ' ',
+		  __entry->ack ? '.' : ' ',
+		  __entry->keyid, __entry->rnext, __entry->maclen)
+);
+
+DEFINE_EVENT(tcp_ao_event, tcp_ao_handshake_failure,
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb,
+		 const __u8 keyid, const __u8 rnext, const __u8 maclen),
+	TP_ARGS(sk, skb, keyid, rnext, maclen)
+);
+
+DEFINE_EVENT(tcp_ao_event, tcp_ao_wrong_maclen,
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb,
+		 const __u8 keyid, const __u8 rnext, const __u8 maclen),
+	TP_ARGS(sk, skb, keyid, rnext, maclen)
+);
+
+DEFINE_EVENT(tcp_ao_event, tcp_ao_mismatch,
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb,
+		 const __u8 keyid, const __u8 rnext, const __u8 maclen),
+	TP_ARGS(sk, skb, keyid, rnext, maclen)
+);
+
+DEFINE_EVENT(tcp_ao_event, tcp_ao_key_not_found,
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb,
+		 const __u8 keyid, const __u8 rnext, const __u8 maclen),
+	TP_ARGS(sk, skb, keyid, rnext, maclen)
+);
+
+DEFINE_EVENT(tcp_ao_event, tcp_ao_rnext_request,
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb,
+		 const __u8 keyid, const __u8 rnext, const __u8 maclen),
+	TP_ARGS(sk, skb, keyid, rnext, maclen)
+);
+
+DECLARE_EVENT_CLASS(tcp_ao_event_sk,
+
+	TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext),
+
+	TP_ARGS(sk, keyid, rnext),
+
+	TP_STRUCT__entry(
+		__field(__u64, net_cookie)
+		__field(const void *, skaddr)
+		__field(int, state)
+
+		/* sockaddr_in6 is always bigger than sockaddr_in */
+		__array(__u8, saddr, sizeof(struct sockaddr_in6))
+		__array(__u8, daddr, sizeof(struct sockaddr_in6))
+
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__field(__u16, family)
+
+		__field(__u8, keyid)
+		__field(__u8, rnext)
+	),
+
+	TP_fast_assign(
+		const struct inet_sock *inet = inet_sk(sk);
+
+		__entry->net_cookie = sock_net(sk)->net_cookie;
+		__entry->skaddr = sk;
+		__entry->state = sk->sk_state;
+
+		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+		TP_STORE_ADDR_PORTS(__entry, inet, sk);
+
+		/* For filtering use */
+		__entry->sport = ntohs(inet->inet_sport);
+		__entry->dport = ntohs(inet->inet_dport);
+		__entry->family = sk->sk_family;
+
+		__entry->keyid = keyid;
+		__entry->rnext = rnext;
+	),
+
+	TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc keyid=%u rnext=%u",
+		  __entry->net_cookie,
+		  show_tcp_state_name(__entry->state),
+		  show_family_name(__entry->family),
+		  __entry->saddr, __entry->daddr,
+		  __entry->keyid, __entry->rnext)
+);
+
+DEFINE_EVENT(tcp_ao_event_sk, tcp_ao_synack_no_key,
+	TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext),
+	TP_ARGS(sk, keyid, rnext)
+);
+
+DECLARE_EVENT_CLASS(tcp_ao_event_sne,
+
+	TP_PROTO(const struct sock *sk, __u32 new_sne),
+
+	TP_ARGS(sk, new_sne),
+
+	TP_STRUCT__entry(
+		__field(__u64, net_cookie)
+		__field(const void *, skaddr)
+		__field(int, state)
+
+		/* sockaddr_in6 is always bigger than sockaddr_in */
+		__array(__u8, saddr, sizeof(struct sockaddr_in6))
+		__array(__u8, daddr, sizeof(struct sockaddr_in6))
+
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__field(__u16, family)
+
+		__field(__u32, new_sne)
+	),
+
+	TP_fast_assign(
+		const struct inet_sock *inet = inet_sk(sk);
+
+		__entry->net_cookie = sock_net(sk)->net_cookie;
+		__entry->skaddr = sk;
+		__entry->state = sk->sk_state;
+
+		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+		TP_STORE_ADDR_PORTS(__entry, inet, sk);
+
+		/* For filtering use */
+		__entry->sport = ntohs(inet->inet_sport);
+		__entry->dport = ntohs(inet->inet_dport);
+		__entry->family = sk->sk_family;
+
+		__entry->new_sne = new_sne;
+	),
+
+	TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc sne=%u",
+		  __entry->net_cookie,
+		  show_tcp_state_name(__entry->state),
+		  show_family_name(__entry->family),
+		  __entry->saddr, __entry->daddr,
+		  __entry->new_sne)
+);
+
+DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_snd_sne_update,
+	TP_PROTO(const struct sock *sk, __u32 new_sne),
+	TP_ARGS(sk, new_sne)
+);
+
+DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update,
+	TP_PROTO(const struct sock *sk, __u32 new_sne),
+	TP_ARGS(sk, new_sne)
+);
+
 #endif /* _TRACE_TCP_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 78b1b27db91c7a94297a8b6a665fe7e86dfc5750 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <0x7f454c46@gmail.com>
Date: Fri, 7 Jun 2024 00:25:59 +0100
Subject: net/tcp: Remove tcp_hash_fail()

Now there are tracepoints, that cover all functionality of
tcp_hash_fail(), but also wire up missing places
They are also faster, can be disabled and provide filtering.

This potentially may create a regression if a userspace depends on dmesg
logs. Fingers crossed, let's see if anyone complains in reality.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp_ao.h | 37 -------------------------------------
 1 file changed, 37 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
index 198e02004ad2..1d46460d0fef 100644
--- a/include/net/tcp_ao.h
+++ b/include/net/tcp_ao.h
@@ -149,43 +149,6 @@ extern struct static_key_false_deferred tcp_ao_needed;
 #define static_branch_tcp_ao()	false
 #endif
 
-static inline bool tcp_hash_should_produce_warnings(void)
-{
-	return static_branch_tcp_md5() || static_branch_tcp_ao();
-}
-
-#define tcp_hash_fail(msg, family, skb, fmt, ...)			\
-do {									\
-	const struct tcphdr *th = tcp_hdr(skb);				\
-	char hdr_flags[6];						\
-	char *f = hdr_flags;						\
-									\
-	if (!tcp_hash_should_produce_warnings())			\
-		break;							\
-	if (th->fin)							\
-		*f++ = 'F';						\
-	if (th->syn)							\
-		*f++ = 'S';						\
-	if (th->rst)							\
-		*f++ = 'R';						\
-	if (th->psh)							\
-		*f++ = 'P';						\
-	if (th->ack)							\
-		*f++ = '.';						\
-	*f = 0;								\
-	if ((family) == AF_INET) {					\
-		net_info_ratelimited("%s for %pI4.%d->%pI4.%d [%s] " fmt "\n", \
-				msg, &ip_hdr(skb)->saddr, ntohs(th->source), \
-				&ip_hdr(skb)->daddr, ntohs(th->dest),	\
-				hdr_flags, ##__VA_ARGS__);		\
-	} else {							\
-		net_info_ratelimited("%s for [%pI6c].%d->[%pI6c].%d [%s]" fmt "\n", \
-				msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \
-				&ipv6_hdr(skb)->daddr, ntohs(th->dest),	\
-				hdr_flags, ##__VA_ARGS__);		\
-	}								\
-} while (0)
-
 #ifdef CONFIG_TCP_AO
 /* TCP-AO structures and functions */
 struct tcp4_ao_context {
-- 
cgit v1.2.3


From 249ebf3f65f8530beb2cbfb91bff1d83ba88d23c Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 5 Jun 2024 14:38:49 +0200
Subject: power: sequencing: implement the pwrseq core

Implement the power sequencing subsystem allowing devices to share
complex powering-up and down procedures. It's split into the consumer
and provider parts but does not implement any new DT bindings so that
the actual power sequencing is never revealed in the DT representation.

Tested-by: Amit Pundir <amit.pundir@linaro.org>
Tested-by: Neil Armstrong <neil.armstrong@linaro.org> # on SM8550-QRD, SM8650-QRD & SM8650-HDK
Tested-by: Caleb Connolly <caleb.connolly@linaro.org> # OnePlus 8T
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Link: https://lore.kernel.org/r/20240605123850.24857-2-brgl@bgdev.pl
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/pwrseq/consumer.h | 56 ++++++++++++++++++++++++++++++
 include/linux/pwrseq/provider.h | 75 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 131 insertions(+)
 create mode 100644 include/linux/pwrseq/consumer.h
 create mode 100644 include/linux/pwrseq/provider.h

(limited to 'include')

diff --git a/include/linux/pwrseq/consumer.h b/include/linux/pwrseq/consumer.h
new file mode 100644
index 000000000000..7d583b4f266e
--- /dev/null
+++ b/include/linux/pwrseq/consumer.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 Linaro Ltd.
+ */
+
+#ifndef __POWER_SEQUENCING_CONSUMER_H__
+#define __POWER_SEQUENCING_CONSUMER_H__
+
+#include <linux/err.h>
+
+struct device;
+struct pwrseq_desc;
+
+#if IS_ENABLED(CONFIG_POWER_SEQUENCING)
+
+struct pwrseq_desc * __must_check
+pwrseq_get(struct device *dev, const char *target);
+void pwrseq_put(struct pwrseq_desc *desc);
+
+struct pwrseq_desc * __must_check
+devm_pwrseq_get(struct device *dev, const char *target);
+
+int pwrseq_power_on(struct pwrseq_desc *desc);
+int pwrseq_power_off(struct pwrseq_desc *desc);
+
+#else /* CONFIG_POWER_SEQUENCING */
+
+static inline struct pwrseq_desc * __must_check
+pwrseq_get(struct device *dev, const char *target)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline void pwrseq_put(struct pwrseq_desc *desc)
+{
+}
+
+static inline struct pwrseq_desc * __must_check
+devm_pwrseq_get(struct device *dev, const char *target)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline int pwrseq_power_on(struct pwrseq_desc *desc)
+{
+	return -ENOSYS;
+}
+
+static inline int pwrseq_power_off(struct pwrseq_desc *desc)
+{
+	return -ENOSYS;
+}
+
+#endif /* CONFIG_POWER_SEQUENCING */
+
+#endif /* __POWER_SEQUENCING_CONSUMER_H__ */
diff --git a/include/linux/pwrseq/provider.h b/include/linux/pwrseq/provider.h
new file mode 100644
index 000000000000..cbc3607cbfcf
--- /dev/null
+++ b/include/linux/pwrseq/provider.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 Linaro Ltd.
+ */
+
+#ifndef __POWER_SEQUENCING_PROVIDER_H__
+#define __POWER_SEQUENCING_PROVIDER_H__
+
+struct device;
+struct module;
+struct pwrseq_device;
+
+typedef int (*pwrseq_power_state_func)(struct pwrseq_device *);
+typedef int (*pwrseq_match_func)(struct pwrseq_device *, struct device *);
+
+/**
+ * struct pwrseq_unit_data - Configuration of a single power sequencing
+ *                           unit.
+ * @name: Name of the unit.
+ * @deps: Units that must be enabled before this one and disabled after it
+ *        in the order they come in this array. Must be NULL-terminated.
+ * @enable: Callback running the part of the power-on sequence provided by
+ *          this unit.
+ * @disable: Callback running the part of the power-off sequence provided
+ *           by this unit.
+ */
+struct pwrseq_unit_data {
+	const char *name;
+	const struct pwrseq_unit_data **deps;
+	pwrseq_power_state_func enable;
+	pwrseq_power_state_func disable;
+};
+
+/**
+ * struct pwrseq_target_data - Configuration of a power sequencing target.
+ * @name: Name of the target.
+ * @unit: Final unit that this target must reach in order to be considered
+ *        enabled.
+ * @post_enable: Callback run after the target unit has been enabled, *after*
+ *               the state lock has been released. It's useful for implementing
+ *               boot-up delays without blocking other users from powering up
+ *               using the same power sequencer.
+ */
+struct pwrseq_target_data {
+	const char *name;
+	const struct pwrseq_unit_data *unit;
+	pwrseq_power_state_func post_enable;
+};
+
+/**
+ * struct pwrseq_config - Configuration used for registering a new provider.
+ * @parent: Parent device for the sequencer. Must be set.
+ * @owner: Module providing this device.
+ * @drvdata: Private driver data.
+ * @match: Provider callback used to match the consumer device to the sequencer.
+ * @targets: Array of targets for this power sequencer. Must be NULL-terminated.
+ */
+struct pwrseq_config {
+	struct device *parent;
+	struct module *owner;
+	void *drvdata;
+	pwrseq_match_func match;
+	const struct pwrseq_target_data **targets;
+};
+
+struct pwrseq_device *
+pwrseq_device_register(const struct pwrseq_config *config);
+void pwrseq_device_unregister(struct pwrseq_device *pwrseq);
+struct pwrseq_device *
+devm_pwrseq_device_register(struct device *dev,
+			    const struct pwrseq_config *config);
+
+void *pwrseq_device_get_drvdata(struct pwrseq_device *pwrseq);
+
+#endif /* __POWER_SEQUENCING_PROVIDER_H__ */
-- 
cgit v1.2.3


From 000d1940c90984a9a2af9c02bc17e3ca0d87f71d Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Fri, 7 Jun 2024 16:22:58 +0300
Subject: drm/connector: hdmi: allow disabling Audio Infoframe

Add drm_atomic_helper_connector_hdmi_disable_audio_infoframe(), an API
to allow the driver disable sending the Audio Infoframe. This is to be
used by the drivers if setup of the infoframes is not tightly coupled
with the audio functionality and just disabling the audio playback
doesn't stop the HDMI hardware from sending the Infoframe.

Acked-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240607-bridge-hdmi-connector-v5-1-ab384e6021af@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 include/drm/display/drm_hdmi_state_helper.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/display/drm_hdmi_state_helper.h b/include/drm/display/drm_hdmi_state_helper.h
index eb162ff24de0..285f366cf716 100644
--- a/include/drm/display/drm_hdmi_state_helper.h
+++ b/include/drm/display/drm_hdmi_state_helper.h
@@ -16,6 +16,7 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector,
 
 int drm_atomic_helper_connector_hdmi_update_audio_infoframe(struct drm_connector *connector,
 							    struct hdmi_audio_infoframe *frame);
+int drm_atomic_helper_connector_hdmi_disable_audio_infoframe(struct drm_connector *connector);
 int drm_atomic_helper_connector_hdmi_update_infoframes(struct drm_connector *connector,
 						       struct drm_atomic_state *state);
 
-- 
cgit v1.2.3


From 6b4468b0c6ba37a16795da567b58dc80bc7fb439 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Fri, 7 Jun 2024 16:23:00 +0300
Subject: drm/bridge-connector: implement glue code for HDMI connector

In order to let bridge chains implement HDMI connector infrastructure,
add necessary glue code to the drm_bridge_connector. In case there is a
bridge that sets DRM_BRIDGE_OP_HDMI, drm_bridge_connector will register
itself as a HDMI connector and provide proxy drm_connector_hdmi_funcs
implementation.

Note, to simplify implementation, there can be only one bridge in a
chain that sets DRM_BRIDGE_OP_HDMI. Setting more than one is considered
an error. This limitation can be lifted later, if the need arises.

Acked-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240607-bridge-hdmi-connector-v5-3-ab384e6021af@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 include/drm/drm_bridge.h | 81 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
index 5cf41f92d1f0..75019d16be64 100644
--- a/include/drm/drm_bridge.h
+++ b/include/drm/drm_bridge.h
@@ -630,6 +630,52 @@ struct drm_bridge_funcs {
 	 */
 	void (*hpd_disable)(struct drm_bridge *bridge);
 
+	/**
+	 * @hdmi_tmds_char_rate_valid:
+	 *
+	 * Check whether a particular TMDS character rate is supported by the
+	 * driver.
+	 *
+	 * This callback is optional and should only be implemented by the
+	 * bridges that take part in the HDMI connector implementation. Bridges
+	 * that implement it shall set the DRM_BRIDGE_OP_HDMI flag in their
+	 * &drm_bridge->ops.
+	 *
+	 * Returns:
+	 *
+	 * Either &drm_mode_status.MODE_OK or one of the failure reasons
+	 * in &enum drm_mode_status.
+	 */
+	enum drm_mode_status
+	(*hdmi_tmds_char_rate_valid)(const struct drm_bridge *bridge,
+				     const struct drm_display_mode *mode,
+				     unsigned long long tmds_rate);
+
+	/**
+	 * @hdmi_clear_infoframe:
+	 *
+	 * This callback clears the infoframes in the hardware during commit.
+	 * It will be called multiple times, once for every disabled infoframe
+	 * type.
+	 *
+	 * This callback is optional but it must be implemented by bridges that
+	 * set the DRM_BRIDGE_OP_HDMI flag in their &drm_bridge->ops.
+	 */
+	int (*hdmi_clear_infoframe)(struct drm_bridge *bridge,
+				    enum hdmi_infoframe_type type);
+	/**
+	 * @hdmi_write_infoframe:
+	 *
+	 * Program the infoframe into the hardware. It will be called multiple
+	 * times, once for every updated infoframe type.
+	 *
+	 * This callback is optional but it must be implemented by bridges that
+	 * set the DRM_BRIDGE_OP_HDMI flag in their &drm_bridge->ops.
+	 */
+	int (*hdmi_write_infoframe)(struct drm_bridge *bridge,
+				    enum hdmi_infoframe_type type,
+				    const u8 *buffer, size_t len);
+
 	/**
 	 * @debugfs_init:
 	 *
@@ -705,6 +751,16 @@ enum drm_bridge_ops {
 	 * this flag shall implement the &drm_bridge_funcs->get_modes callback.
 	 */
 	DRM_BRIDGE_OP_MODES = BIT(3),
+	/**
+	 * @DRM_BRIDGE_OP_HDMI: The bridge provides HDMI connector operations,
+	 * including infoframes support. Bridges that set this flag must
+	 * implement the &drm_bridge_funcs->write_infoframe callback.
+	 *
+	 * Note: currently there can be at most one bridge in a chain that sets
+	 * this bit. This is to simplify corresponding glue code in connector
+	 * drivers.
+	 */
+	DRM_BRIDGE_OP_HDMI = BIT(4),
 };
 
 /**
@@ -773,6 +829,31 @@ struct drm_bridge {
 	 * @hpd_cb.
 	 */
 	void *hpd_data;
+
+	/**
+	 * @vendor: Vendor of the product to be used for the SPD InfoFrame
+	 * generation. This is required if @DRM_BRIDGE_OP_HDMI is set.
+	 */
+	const char *vendor;
+
+	/**
+	 * @product: Name of the product to be used for the SPD InfoFrame
+	 * generation. This is required if @DRM_BRIDGE_OP_HDMI is set.
+	 */
+	const char *product;
+
+	/**
+	 * @supported_formats: Bitmask of @hdmi_colorspace listing supported
+	 * output formats. This is only relevant if @DRM_BRIDGE_OP_HDMI is set.
+	 */
+	unsigned int supported_formats;
+
+	/**
+	 * @max_bpc: Maximum bits per char the HDMI bridge supports. Allowed
+	 * values are 8, 10 and 12. This is only relevant if
+	 * @DRM_BRIDGE_OP_HDMI is set.
+	 */
+	unsigned int max_bpc;
 };
 
 static inline struct drm_bridge *
-- 
cgit v1.2.3


From 8682ad36870790ad6a9a03ac237c8d87d34b26d5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 23 May 2024 12:09:43 +0200
Subject: wifi: cfg80211: use BIT() for flag enums

Use BIT(x) instead of 1<<x, in part because it's mostly
missing spaces anyway, in part because it reads nicer.

Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Reviewed-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://msgid.link/20240523120945.c21598fbf49c.Ib8f26c5e9f508aee19fdfa1fd4b5995f084c46d4@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 102 ++++++++++++++++++++++++-------------------------
 1 file changed, 51 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5da9bb0ac6a4..051d4eb227bf 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -127,31 +127,31 @@ struct wiphy;
  *	even if it is otherwise disabled.
  */
 enum ieee80211_channel_flags {
-	IEEE80211_CHAN_DISABLED		= 1<<0,
-	IEEE80211_CHAN_NO_IR		= 1<<1,
-	IEEE80211_CHAN_PSD		= 1<<2,
-	IEEE80211_CHAN_RADAR		= 1<<3,
-	IEEE80211_CHAN_NO_HT40PLUS	= 1<<4,
-	IEEE80211_CHAN_NO_HT40MINUS	= 1<<5,
-	IEEE80211_CHAN_NO_OFDM		= 1<<6,
-	IEEE80211_CHAN_NO_80MHZ		= 1<<7,
-	IEEE80211_CHAN_NO_160MHZ	= 1<<8,
-	IEEE80211_CHAN_INDOOR_ONLY	= 1<<9,
-	IEEE80211_CHAN_IR_CONCURRENT	= 1<<10,
-	IEEE80211_CHAN_NO_20MHZ		= 1<<11,
-	IEEE80211_CHAN_NO_10MHZ		= 1<<12,
-	IEEE80211_CHAN_NO_HE		= 1<<13,
-	IEEE80211_CHAN_1MHZ		= 1<<14,
-	IEEE80211_CHAN_2MHZ		= 1<<15,
-	IEEE80211_CHAN_4MHZ		= 1<<16,
-	IEEE80211_CHAN_8MHZ		= 1<<17,
-	IEEE80211_CHAN_16MHZ		= 1<<18,
-	IEEE80211_CHAN_NO_320MHZ	= 1<<19,
-	IEEE80211_CHAN_NO_EHT		= 1<<20,
-	IEEE80211_CHAN_DFS_CONCURRENT	= 1<<21,
-	IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = 1<<22,
-	IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = 1<<23,
-	IEEE80211_CHAN_CAN_MONITOR	= 1<<24,
+	IEEE80211_CHAN_DISABLED			= BIT(0),
+	IEEE80211_CHAN_NO_IR			= BIT(1),
+	IEEE80211_CHAN_PSD			= BIT(2),
+	IEEE80211_CHAN_RADAR			= BIT(3),
+	IEEE80211_CHAN_NO_HT40PLUS		= BIT(4),
+	IEEE80211_CHAN_NO_HT40MINUS		= BIT(5),
+	IEEE80211_CHAN_NO_OFDM			= BIT(6),
+	IEEE80211_CHAN_NO_80MHZ			= BIT(7),
+	IEEE80211_CHAN_NO_160MHZ		= BIT(8),
+	IEEE80211_CHAN_INDOOR_ONLY		= BIT(9),
+	IEEE80211_CHAN_IR_CONCURRENT		= BIT(10),
+	IEEE80211_CHAN_NO_20MHZ			= BIT(11),
+	IEEE80211_CHAN_NO_10MHZ			= BIT(12),
+	IEEE80211_CHAN_NO_HE			= BIT(13),
+	IEEE80211_CHAN_1MHZ			= BIT(14),
+	IEEE80211_CHAN_2MHZ			= BIT(15),
+	IEEE80211_CHAN_4MHZ			= BIT(16),
+	IEEE80211_CHAN_8MHZ			= BIT(17),
+	IEEE80211_CHAN_16MHZ			= BIT(18),
+	IEEE80211_CHAN_NO_320MHZ		= BIT(19),
+	IEEE80211_CHAN_NO_EHT			= BIT(20),
+	IEEE80211_CHAN_DFS_CONCURRENT		= BIT(21),
+	IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT	= BIT(22),
+	IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT	= BIT(23),
+	IEEE80211_CHAN_CAN_MONITOR		= BIT(24),
 };
 
 #define IEEE80211_CHAN_NO_HT40 \
@@ -229,13 +229,13 @@ struct ieee80211_channel {
  * @IEEE80211_RATE_SUPPORTS_10MHZ: Rate can be used in 10 MHz mode
  */
 enum ieee80211_rate_flags {
-	IEEE80211_RATE_SHORT_PREAMBLE	= 1<<0,
-	IEEE80211_RATE_MANDATORY_A	= 1<<1,
-	IEEE80211_RATE_MANDATORY_B	= 1<<2,
-	IEEE80211_RATE_MANDATORY_G	= 1<<3,
-	IEEE80211_RATE_ERP_G		= 1<<4,
-	IEEE80211_RATE_SUPPORTS_5MHZ	= 1<<5,
-	IEEE80211_RATE_SUPPORTS_10MHZ	= 1<<6,
+	IEEE80211_RATE_SHORT_PREAMBLE	= BIT(0),
+	IEEE80211_RATE_MANDATORY_A	= BIT(1),
+	IEEE80211_RATE_MANDATORY_B	= BIT(2),
+	IEEE80211_RATE_MANDATORY_G	= BIT(3),
+	IEEE80211_RATE_ERP_G		= BIT(4),
+	IEEE80211_RATE_SUPPORTS_5MHZ	= BIT(5),
+	IEEE80211_RATE_SUPPORTS_10MHZ	= BIT(6),
 };
 
 /**
@@ -1957,9 +1957,9 @@ struct rate_info {
  * @BSS_PARAM_FLAGS_SHORT_SLOT_TIME: whether short slot time is enabled
  */
 enum bss_param_flags {
-	BSS_PARAM_FLAGS_CTS_PROT	= 1<<0,
-	BSS_PARAM_FLAGS_SHORT_PREAMBLE	= 1<<1,
-	BSS_PARAM_FLAGS_SHORT_SLOT_TIME	= 1<<2,
+	BSS_PARAM_FLAGS_CTS_PROT	= BIT(0),
+	BSS_PARAM_FLAGS_SHORT_PREAMBLE	= BIT(1),
+	BSS_PARAM_FLAGS_SHORT_SLOT_TIME	= BIT(2),
 };
 
 /**
@@ -2266,13 +2266,13 @@ static inline int cfg80211_get_station(struct net_device *dev,
  * @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address
  */
 enum monitor_flags {
-	MONITOR_FLAG_CHANGED		= 1<<__NL80211_MNTR_FLAG_INVALID,
-	MONITOR_FLAG_FCSFAIL		= 1<<NL80211_MNTR_FLAG_FCSFAIL,
-	MONITOR_FLAG_PLCPFAIL		= 1<<NL80211_MNTR_FLAG_PLCPFAIL,
-	MONITOR_FLAG_CONTROL		= 1<<NL80211_MNTR_FLAG_CONTROL,
-	MONITOR_FLAG_OTHER_BSS		= 1<<NL80211_MNTR_FLAG_OTHER_BSS,
-	MONITOR_FLAG_COOK_FRAMES	= 1<<NL80211_MNTR_FLAG_COOK_FRAMES,
-	MONITOR_FLAG_ACTIVE		= 1<<NL80211_MNTR_FLAG_ACTIVE,
+	MONITOR_FLAG_CHANGED		= BIT(__NL80211_MNTR_FLAG_INVALID),
+	MONITOR_FLAG_FCSFAIL		= BIT(NL80211_MNTR_FLAG_FCSFAIL),
+	MONITOR_FLAG_PLCPFAIL		= BIT(NL80211_MNTR_FLAG_PLCPFAIL),
+	MONITOR_FLAG_CONTROL		= BIT(NL80211_MNTR_FLAG_CONTROL),
+	MONITOR_FLAG_OTHER_BSS		= BIT(NL80211_MNTR_FLAG_OTHER_BSS),
+	MONITOR_FLAG_COOK_FRAMES	= BIT(NL80211_MNTR_FLAG_COOK_FRAMES),
+	MONITOR_FLAG_ACTIVE		= BIT(NL80211_MNTR_FLAG_ACTIVE),
 };
 
 /**
@@ -3399,15 +3399,15 @@ enum cfg80211_connect_params_changed {
  * @WIPHY_PARAM_TXQ_QUANTUM: TXQ scheduler quantum
  */
 enum wiphy_params_flags {
-	WIPHY_PARAM_RETRY_SHORT		= 1 << 0,
-	WIPHY_PARAM_RETRY_LONG		= 1 << 1,
-	WIPHY_PARAM_FRAG_THRESHOLD	= 1 << 2,
-	WIPHY_PARAM_RTS_THRESHOLD	= 1 << 3,
-	WIPHY_PARAM_COVERAGE_CLASS	= 1 << 4,
-	WIPHY_PARAM_DYN_ACK		= 1 << 5,
-	WIPHY_PARAM_TXQ_LIMIT		= 1 << 6,
-	WIPHY_PARAM_TXQ_MEMORY_LIMIT	= 1 << 7,
-	WIPHY_PARAM_TXQ_QUANTUM		= 1 << 8,
+	WIPHY_PARAM_RETRY_SHORT		= BIT(0),
+	WIPHY_PARAM_RETRY_LONG		= BIT(1),
+	WIPHY_PARAM_FRAG_THRESHOLD	= BIT(2),
+	WIPHY_PARAM_RTS_THRESHOLD	= BIT(3),
+	WIPHY_PARAM_COVERAGE_CLASS	= BIT(4),
+	WIPHY_PARAM_DYN_ACK		= BIT(5),
+	WIPHY_PARAM_TXQ_LIMIT		= BIT(6),
+	WIPHY_PARAM_TXQ_MEMORY_LIMIT	= BIT(7),
+	WIPHY_PARAM_TXQ_QUANTUM		= BIT(8),
 };
 
 #define IEEE80211_DEFAULT_AIRTIME_WEIGHT	256
-- 
cgit v1.2.3


From 92a5df6687da755edd4bfe1acb714f00f56d2e06 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 23 May 2024 12:09:44 +0200
Subject: wifi: ieee80211: remove unused enum ieee80211_client_reg_power

This has never been used, and it's really not directly
representing the spec, so shouldn't have been here in
the first place. Remove it.

Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://msgid.link/20240523120945.32ed8fc1522d.Id4480d162e1921478e33d145890dc16c263b57bf@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 769008a51809..456a55bd6c6b 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2426,24 +2426,6 @@ enum ieee80211_ap_reg_power {
 		IEEE80211_REG_AP_POWER_AFTER_LAST - 1,
 };
 
-/**
- * enum ieee80211_client_reg_power - regulatory power for a client
- *
- * @IEEE80211_REG_UNSET_CLIENT: Client has no regulatory power mode
- * @IEEE80211_REG_DEFAULT_CLIENT: Default Client
- * @IEEE80211_REG_SUBORDINATE_CLIENT: Subordinate Client
- * @IEEE80211_REG_CLIENT_POWER_AFTER_LAST: internal
- * @IEEE80211_REG_CLIENT_POWER_MAX: maximum value
- */
-enum ieee80211_client_reg_power {
-	IEEE80211_REG_UNSET_CLIENT,
-	IEEE80211_REG_DEFAULT_CLIENT,
-	IEEE80211_REG_SUBORDINATE_CLIENT,
-	IEEE80211_REG_CLIENT_POWER_AFTER_LAST,
-	IEEE80211_REG_CLIENT_POWER_MAX =
-		IEEE80211_REG_CLIENT_POWER_AFTER_LAST - 1,
-};
-
 /* 802.11ax HE MAC capabilities */
 #define IEEE80211_HE_MAC_CAP0_HTC_HE				0x01
 #define IEEE80211_HE_MAC_CAP0_TWT_REQ				0x02
-- 
cgit v1.2.3


From 0a9314ad5f45aeb10326dce33c5d70b85f74ef2d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 23 May 2024 12:09:45 +0200
Subject: wifi: cfg80211: move enum ieee80211_ap_reg_power to cfg80211

This really shouldn't have been in ieee80211.h, since it
doesn't directly represent the spec. Move it to cfg80211
rather than mac80211 since upcoming changes will use it
there.

Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://msgid.link/20240523120945.962b16c831cd.I5745962525b1b176c5b90d37b3720fc100eee406@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 20 --------------------
 include/net/cfg80211.h    | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 456a55bd6c6b..30cef3b940eb 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2406,26 +2406,6 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 			      int mcs, bool ext_nss_bw_capable,
 			      unsigned int max_vht_nss);
 
-/**
- * enum ieee80211_ap_reg_power - regulatory power for a Access Point
- *
- * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode
- * @IEEE80211_REG_LPI_AP: Indoor Access Point
- * @IEEE80211_REG_SP_AP: Standard power Access Point
- * @IEEE80211_REG_VLP_AP: Very low power Access Point
- * @IEEE80211_REG_AP_POWER_AFTER_LAST: internal
- * @IEEE80211_REG_AP_POWER_MAX: maximum value
- */
-enum ieee80211_ap_reg_power {
-	IEEE80211_REG_UNSET_AP,
-	IEEE80211_REG_LPI_AP,
-	IEEE80211_REG_SP_AP,
-	IEEE80211_REG_VLP_AP,
-	IEEE80211_REG_AP_POWER_AFTER_LAST,
-	IEEE80211_REG_AP_POWER_MAX =
-		IEEE80211_REG_AP_POWER_AFTER_LAST - 1,
-};
-
 /* 802.11ax HE MAC capabilities */
 #define IEEE80211_HE_MAC_CAP0_HTC_HE				0x01
 #define IEEE80211_HE_MAC_CAP0_TWT_REQ				0x02
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 051d4eb227bf..fb7d76513e1c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6081,6 +6081,21 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy,
 void wiphy_delayed_work_flush(struct wiphy *wiphy,
 			      struct wiphy_delayed_work *dwork);
 
+/**
+ * enum ieee80211_ap_reg_power - regulatory power for an Access Point
+ *
+ * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode
+ * @IEEE80211_REG_LPI_AP: Indoor Access Point
+ * @IEEE80211_REG_SP_AP: Standard power Access Point
+ * @IEEE80211_REG_VLP_AP: Very low power Access Point
+ */
+enum ieee80211_ap_reg_power {
+	IEEE80211_REG_UNSET_AP,
+	IEEE80211_REG_LPI_AP,
+	IEEE80211_REG_SP_AP,
+	IEEE80211_REG_VLP_AP,
+};
+
 /**
  * struct wireless_dev - wireless device state
  *
-- 
cgit v1.2.3


From 9fd171a71b9d4cd8855891c0ba7e2a152139b41a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 23 May 2024 12:09:48 +0200
Subject: wifi: cfg80211: refactor regulatory beaconing checking

There are two functions exported now, with different settings,
refactor to just export a single function that take a struct
with different settings. This will make it easier to add more
parameters.

Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://msgid.link/20240523120945.d44c34dadfc2.I59b4403108e0dbf7fc6ae8f7522e1af520cffb1c@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 54 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 48 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index fb7d76513e1c..45ffeb110d36 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -8800,6 +8800,31 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy,
 					sig_dbm);
 }
 
+/**
+ * struct cfg80211_beaconing_check_config - beacon check configuration
+ * @iftype: the interface type to check for
+ * @relax: allow IR-relaxation conditions to apply (e.g. another
+ *	interface connected already on the same channel)
+ *	NOTE: If this is set, wiphy mutex must be held.
+ */
+struct cfg80211_beaconing_check_config {
+	enum nl80211_iftype iftype;
+	bool relax;
+};
+
+/**
+ * cfg80211_reg_check_beaconing - check if beaconing is allowed
+ * @wiphy: the wiphy
+ * @chandef: the channel definition
+ * @cfg: additional parameters for the checking
+ *
+ * Return: %true if there is no secondary channel or the secondary channel(s)
+ * can be used for beaconing (i.e. is not a radar channel etc.)
+ */
+bool cfg80211_reg_check_beaconing(struct wiphy *wiphy,
+				  struct cfg80211_chan_def *chandef,
+				  struct cfg80211_beaconing_check_config *cfg);
+
 /**
  * cfg80211_reg_can_beacon - check if beaconing is allowed
  * @wiphy: the wiphy
@@ -8809,9 +8834,17 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy,
  * Return: %true if there is no secondary channel or the secondary channel(s)
  * can be used for beaconing (i.e. is not a radar channel etc.)
  */
-bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
-			     struct cfg80211_chan_def *chandef,
-			     enum nl80211_iftype iftype);
+static inline bool
+cfg80211_reg_can_beacon(struct wiphy *wiphy,
+			struct cfg80211_chan_def *chandef,
+			enum nl80211_iftype iftype)
+{
+	struct cfg80211_beaconing_check_config config = {
+		.iftype = iftype,
+	};
+
+	return cfg80211_reg_check_beaconing(wiphy, chandef, &config);
+}
 
 /**
  * cfg80211_reg_can_beacon_relax - check if beaconing is allowed with relaxation
@@ -8826,9 +8859,18 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
  *
  * Context: Requires the wiphy mutex to be held.
  */
-bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
-				   struct cfg80211_chan_def *chandef,
-				   enum nl80211_iftype iftype);
+static inline bool
+cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
+			      struct cfg80211_chan_def *chandef,
+			      enum nl80211_iftype iftype)
+{
+	struct cfg80211_beaconing_check_config config = {
+		.iftype = iftype,
+		.relax = true,
+	};
+
+	return cfg80211_reg_check_beaconing(wiphy, chandef, &config);
+}
 
 /**
  * cfg80211_ch_switch_notify - update wdev channel and notify userspace
-- 
cgit v1.2.3


From c1d8bd8d777d55f6708ca6e47c54dbe9f66f9bbb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 23 May 2024 12:09:49 +0200
Subject: wifi: cfg80211: add regulatory flag to allow VLP AP operation

Add a regulatory flag to allow VLP AP operation even on
channels otherwise marked NO_IR, which may be possible
in some regulatory domains/countries.

Note that this requires checking also when the beacon is
changed, since that may change the regulatory power type.

Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://msgid.link/20240523120945.63792ce19790.Ie2a02750d283b78fbf3c686b10565fb0388889e2@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 6 ++++++
 include/uapi/linux/nl80211.h | 6 ++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 45ffeb110d36..6f992aff74ae 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -125,6 +125,8 @@ struct wiphy;
  * @IEEE80211_CHAN_CAN_MONITOR: This channel can be used for monitor
  *	mode even in the presence of other (regulatory) restrictions,
  *	even if it is otherwise disabled.
+ * @IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP: Allow using this channel for AP operation
+ *	with very low power (VLP), even if otherwise set to NO_IR.
  */
 enum ieee80211_channel_flags {
 	IEEE80211_CHAN_DISABLED			= BIT(0),
@@ -152,6 +154,7 @@ enum ieee80211_channel_flags {
 	IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT	= BIT(22),
 	IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT	= BIT(23),
 	IEEE80211_CHAN_CAN_MONITOR		= BIT(24),
+	IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP	= BIT(25),
 };
 
 #define IEEE80211_CHAN_NO_HT40 \
@@ -8806,9 +8809,12 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy,
  * @relax: allow IR-relaxation conditions to apply (e.g. another
  *	interface connected already on the same channel)
  *	NOTE: If this is set, wiphy mutex must be held.
+ * @reg_power: &enum ieee80211_ap_reg_power value indicating the
+ *	advertised/used 6 GHz regulatory power setting
  */
 struct cfg80211_beaconing_check_config {
 	enum nl80211_iftype iftype;
+	enum ieee80211_ap_reg_power reg_power;
 	bool relax;
 };
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f917bc6c9b6f..6ae3997061b6 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4277,6 +4277,8 @@ enum nl80211_wmm_rule {
  * @NL80211_FREQUENCY_ATTR_CAN_MONITOR: This channel can be used in monitor
  *	mode despite other (regulatory) restrictions, even if the channel is
  *	otherwise completely disabled.
+ * @NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP: This channel can be used for a
+ *	very low power (VLP) AP, despite being NO_IR.
  * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
  *	currently defined
  * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
@@ -4320,6 +4322,7 @@ enum nl80211_frequency_attr {
 	NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT,
 	NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT,
 	NL80211_FREQUENCY_ATTR_CAN_MONITOR,
+	NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP,
 
 	/* keep last */
 	__NL80211_FREQUENCY_ATTR_AFTER_LAST,
@@ -4529,6 +4532,8 @@ enum nl80211_sched_scan_match_attr {
  *	Should be used together with %NL80211_RRF_DFS only.
  * @NL80211_RRF_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP not allowed
  * @NL80211_RRF_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP not allowed
+ * @NL80211_RRF_ALLOW_6GHZ_VLP_AP: Very low power (VLP) AP can be permitted
+ *	despite NO_IR configuration.
  */
 enum nl80211_reg_rule_flags {
 	NL80211_RRF_NO_OFDM		= 1<<0,
@@ -4553,6 +4558,7 @@ enum nl80211_reg_rule_flags {
 	NL80211_RRF_DFS_CONCURRENT	= 1<<21,
 	NL80211_RRF_NO_6GHZ_VLP_CLIENT	= 1<<22,
 	NL80211_RRF_NO_6GHZ_AFC_CLIENT	= 1<<23,
+	NL80211_RRF_ALLOW_6GHZ_VLP_AP	= 1<<24,
 };
 
 #define NL80211_RRF_PASSIVE_SCAN	NL80211_RRF_NO_IR
-- 
cgit v1.2.3


From 4565d2652a37e438e4cd729e2a8dfeffe34c958c Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Wed, 12 Jun 2024 10:20:17 +0200
Subject: PCI/pwrctl: Add PCI power control core code

Some PCI devices must be powered-on before they can be detected on the
bus. Introduce a simple framework reusing the existing PCI OF
infrastructure.

The way this works is: a DT node representing a PCI device connected to
the port can be matched against its power control platform driver. If
the match succeeds, the driver is responsible for powering-up the device
and calling pci_pwrctl_device_set_ready() which will trigger a PCI bus
rescan as well as subscribe to PCI bus notifications.

When the device is detected and created, we'll make it consume the same
DT node that the platform device did. When the device is bound, we'll
create a device link between it and the parent power control device.

Tested-by: Amit Pundir <amit.pundir@linaro.org>
Tested-by: Neil Armstrong <neil.armstrong@linaro.org> # on SM8550-QRD, SM8650-QRD & SM8650-HDK
Tested-by: Caleb Connolly <caleb.connolly@linaro.org> # OnePlus 8T
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20240612082019.19161-5-brgl@bgdev.pl
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/pci-pwrctl.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 include/linux/pci-pwrctl.h

(limited to 'include')

diff --git a/include/linux/pci-pwrctl.h b/include/linux/pci-pwrctl.h
new file mode 100644
index 000000000000..45e9cfe740e4
--- /dev/null
+++ b/include/linux/pci-pwrctl.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 Linaro Ltd.
+ */
+
+#ifndef __PCI_PWRCTL_H__
+#define __PCI_PWRCTL_H__
+
+#include <linux/notifier.h>
+
+struct device;
+struct device_link;
+
+/*
+ * This is a simple framework for solving the issue of PCI devices that require
+ * certain resources (regulators, GPIOs, clocks) to be enabled before the
+ * device can actually be detected on the PCI bus.
+ *
+ * The idea is to reuse the platform bus to populate OF nodes describing the
+ * PCI device and its resources, let these platform devices probe and enable
+ * relevant resources and then trigger a rescan of the PCI bus allowing for the
+ * same device (with a second associated struct device) to be registered with
+ * the PCI subsystem.
+ *
+ * To preserve a correct hierarchy for PCI power management and device reset,
+ * we create a device link between the power control platform device (parent)
+ * and the supplied PCI device (child).
+ */
+
+/**
+ * struct pci_pwrctl - PCI device power control context.
+ * @dev: Address of the power controlling device.
+ *
+ * An object of this type must be allocated by the PCI power control device and
+ * passed to the pwrctl subsystem to trigger a bus rescan and setup a device
+ * link with the device once it's up.
+ */
+struct pci_pwrctl {
+	struct device *dev;
+
+	/* Private: don't use. */
+	struct notifier_block nb;
+	struct device_link *link;
+};
+
+int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl);
+void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl);
+int devm_pci_pwrctl_device_set_ready(struct device *dev,
+				     struct pci_pwrctl *pwrctl);
+
+#endif /* __PCI_PWRCTL_H__ */
-- 
cgit v1.2.3


From 7180f8d91fcbf252de572d9ffacc945effed0060 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Tue, 11 Jun 2024 19:38:22 +0200
Subject: vfs: add rcu-based find_inode variants for iget ops

This avoids one inode hash lock acquire in the common case on inode
creation, in effect significantly reducing contention.

On the stock kernel said lock is typically taken twice:
1. once to check if the inode happens to already be present
2. once to add it to the hash

The back-to-back lock/unlock pattern is known to degrade performance
significantly, which is further exacerbated if the hash is heavily
populated (long chains to walk, extending hold time). Arguably hash
sizing and hashing algo need to be revisited, but that's beyond the
scope of this patch.

With the acquire from step 1 eliminated with RCU lookup throughput
increases significantly at the scale of 20 cores (benchmark results at
the bottom).

So happens the hash already supports RCU-based operation, but lookups on
inode insertions didn't take advantage of it.

This of course has its limits as the global lock is still a bottleneck.
There was a patchset posted which introduced fine-grained locking[1] but
it appears staled. Apart from that doubt was expressed whether a
handrolled hash implementation is appropriate to begin with, suggesting
replacement with rhashtables. Nobody committed to carrying [1] across
the finish line or implementing anything better, thus the bandaid below.

iget_locked consumers (notably ext4) get away without any changes
because inode comparison method is built-in.

iget5_locked consumers pass a custom callback. Since removal of locking
adds more problems (inode can be changing) it's not safe to assume all
filesystems happen to cope.  Thus iget5_locked_rcu gets added, requiring
manual conversion of interested filesystems.

In order to reduce code duplication find_inode and find_inode_fast grow
an argument indicating whether inode hash lock is held, which is passed
down in case sleeping is necessary. They always rcu_read_lock, which is
redundant but harmless. Doing it conditionally reduces readability for
no real gain that I can see. RCU-alike restrictions were already put on
callbacks due to the hash spinlock being held.

Benchmarking:
There is a real cache-busting workload scanning millions of files in
parallel (it's a backup appliance), where the initial lookup is
guaranteed to fail resulting in the two lock acquires on stock kernel
(and one with the patch at hand).

Implemented below is a synthetic benchmark providing the same behavior.
[I shall note the workload is not running on Linux, instead it was
causing trouble elsewhere. Benchmark below was used while addressing
said problems and was found to adequately represent the real workload.]

Total real time fluctuates by 1-2s.

With 20 threads each walking a dedicated 1000 dirs * 1000 files
directory tree to stat(2) on a 32 core + 24GB RAM vm:

ext4 (needed mkfs.ext4 -N 24000000):
before: 3.77s user 890.90s system 1939% cpu 46.118 total
after:  3.24s user 397.73s system 1858% cpu 21.581 total (-53%)

That's 20 million files to visit, while the machine can only cache about
15 million at a time (obtained from ext4_inode_cache object count in
/proc/slabinfo). Since each terminal inode is only visited once per run
this amounts to 0% hit ratio for the dentry cache and the hash table
(there are however hits for the intermediate directories).

On repeated runs the kernel caches the last ~15 mln, meaning there is ~5
mln of uncached inodes which are going to be visited first, evicting the
previously cached state as it happens.

Lack of hits can be trivially verified with bpftrace, like so:
bpftrace -e 'kretprobe:find_inode_fast { @[kstack(), retval != 0] = count(); }'\
-c "/bin/sh walktrees /testfs 20"

Best ran more than once.

Expected results after "warmup":
[snip]
@[
    __ext4_iget+275
    ext4_lookup+224
    __lookup_slow+130
    walk_component+219
    link_path_walk.part.0.constprop.0+614
    path_lookupat+62
    filename_lookup+204
    vfs_statx+128
    vfs_fstatat+131
    __do_sys_newfstatat+38
    do_syscall_64+87
    entry_SYSCALL_64_after_hwframe+118
, 1]: 20000
@[
    __ext4_iget+275
    ext4_lookup+224
    __lookup_slow+130
    walk_component+219
    path_lookupat+106
    filename_lookup+204
    vfs_statx+128
    vfs_fstatat+131
    __do_sys_newfstatat+38
    do_syscall_64+87
    entry_SYSCALL_64_after_hwframe+118
, 1]: 20000000

That is 20 million calls for the initial lookup and 20 million after
allocating a new inode, all of them failing to return a value != 0
(i.e., they are returning NULL -- no match found).

Of course aborting the benchmark in the middle and starting it again (or
messing with the state in other ways) is going to alter these results.

Benchmark can be found here: https://people.freebsd.org/~mjg/fstree.tgz

[1] https://lore.kernel.org/all/20231206060629.2827226-1-david@fromorbit.com/

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://lore.kernel.org/r/20240611173824.535995-2-mjguzik@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0283cf366c2a..aac51da4f90c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3029,7 +3029,12 @@ extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
 		int (*test)(struct inode *, void *),
 		int (*set)(struct inode *, void *),
 		void *data);
-extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
+struct inode *iget5_locked(struct super_block *, unsigned long,
+			   int (*test)(struct inode *, void *),
+			   int (*set)(struct inode *, void *), void *);
+struct inode *iget5_locked_rcu(struct super_block *, unsigned long,
+			       int (*test)(struct inode *, void *),
+			       int (*set)(struct inode *, void *), void *);
 extern struct inode * iget_locked(struct super_block *, unsigned long);
 extern struct inode *find_inode_nowait(struct super_block *,
 				       unsigned long,
-- 
cgit v1.2.3


From 66055636a146c435cd226fb5a334176304652f3c Mon Sep 17 00:00:00 2001
From: Tejas Vipin <tejasvipin76@gmail.com>
Date: Wed, 12 Jun 2024 19:05:43 +0530
Subject: drm/mipi-dsi: fix handling of ctx in mipi_dsi_msleep

ctx would be better off treated as a pointer to account for most of its
usage so far, and brackets should be added to account for operator
precedence for correct evaluation.

Fixes: f79d6d28d8fe ("drm/mipi-dsi: wrap more functions for streamline handling")
Signed-off-by: Tejas Vipin <tejasvipin76@gmail.com>
Suggested-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240612133550.473279-3-tejasvipin76@gmail.com
[narmstrong: fixed fixes tag]
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240612133550.473279-3-tejasvipin76@gmail.com
---
 include/drm/drm_mipi_dsi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index bd5a0b6d0711..71d121aeef24 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -293,7 +293,7 @@ ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params,
 
 #define mipi_dsi_msleep(ctx, delay)	\
 	do {				\
-		if (!ctx.accum_err)	\
+		if (!(ctx)->accum_err)	\
 			msleep(delay);	\
 	} while (0)
 
-- 
cgit v1.2.3


From 3b9c181bcde8555ca81b2394c2dc2201cefc2dd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= <jose.souza@intel.com>
Date: Tue, 11 Jun 2024 10:47:15 -0700
Subject: devcoredump: Add dev_coredumpm_timeout()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add function to set a custom coredump timeout.

For Xe driver usage, current 5 minutes timeout may be too short for
users to search and understand what needs to be done to capture
coredump to report bugs.

We have plans to automate(distribute a udev script) it but at the end
will be up to distros and users to pack it so having a option to
increase the timeout is a safer option.

v2:
- replace dev_coredump_timeout_set() by dev_coredumpm_timeout() (Mukesh)

v3:
- make dev_coredumpm() static inline (Johannes)

v5:
- rename DEVCOREDUMP_TIMEOUT -> DEVCD_TIMEOUT to avoid redefinition
in include/net/bluetooth/coredump.h

v6:
- fix definition of dev_coredumpm_timeout() when CONFIG_DEV_COREDUMP
is disabled

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Mukesh Ojha <quic_mojha@quicinc.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20240611174716.72660-1-jose.souza@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/linux/devcoredump.h | 53 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h
index c8f7eb6cc191..377892604ff4 100644
--- a/include/linux/devcoredump.h
+++ b/include/linux/devcoredump.h
@@ -12,6 +12,9 @@
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 
+/* if data isn't read by userspace after 5 minutes then delete it */
+#define DEVCD_TIMEOUT	(HZ * 60 * 5)
+
 /*
  * _devcd_free_sgtable - free all the memory of the given scatterlist table
  * (i.e. both pages and scatterlist instances)
@@ -50,16 +53,17 @@ static inline void _devcd_free_sgtable(struct scatterlist *table)
 	kfree(delete_iter);
 }
 
-
 #ifdef CONFIG_DEV_COREDUMP
 void dev_coredumpv(struct device *dev, void *data, size_t datalen,
 		   gfp_t gfp);
 
-void dev_coredumpm(struct device *dev, struct module *owner,
-		   void *data, size_t datalen, gfp_t gfp,
-		   ssize_t (*read)(char *buffer, loff_t offset, size_t count,
-				   void *data, size_t datalen),
-		   void (*free)(void *data));
+void dev_coredumpm_timeout(struct device *dev, struct module *owner,
+			   void *data, size_t datalen, gfp_t gfp,
+			   ssize_t (*read)(char *buffer, loff_t offset,
+					   size_t count, void *data,
+					   size_t datalen),
+			   void (*free)(void *data),
+			   unsigned long timeout);
 
 void dev_coredumpsg(struct device *dev, struct scatterlist *table,
 		    size_t datalen, gfp_t gfp);
@@ -73,11 +77,13 @@ static inline void dev_coredumpv(struct device *dev, void *data,
 }
 
 static inline void
-dev_coredumpm(struct device *dev, struct module *owner,
-	      void *data, size_t datalen, gfp_t gfp,
-	      ssize_t (*read)(char *buffer, loff_t offset, size_t count,
-			      void *data, size_t datalen),
-	      void (*free)(void *data))
+dev_coredumpm_timeout(struct device *dev, struct module *owner,
+		      void *data, size_t datalen, gfp_t gfp,
+		      ssize_t (*read)(char *buffer, loff_t offset,
+				      size_t count, void *data,
+				      size_t datalen),
+		      void (*free)(void *data),
+		      unsigned long timeout)
 {
 	free(data);
 }
@@ -92,4 +98,29 @@ static inline void dev_coredump_put(struct device *dev)
 }
 #endif /* CONFIG_DEV_COREDUMP */
 
+/**
+ * dev_coredumpm - create device coredump with read/free methods
+ * @dev: the struct device for the crashed device
+ * @owner: the module that contains the read/free functions, use %THIS_MODULE
+ * @data: data cookie for the @read/@free functions
+ * @datalen: length of the data
+ * @gfp: allocation flags
+ * @read: function to read from the given buffer
+ * @free: function to free the given buffer
+ *
+ * Creates a new device coredump for the given device. If a previous one hasn't
+ * been read yet, the new coredump is discarded. The data lifetime is determined
+ * by the device coredump framework and when it is no longer needed the @free
+ * function will be called to free the data.
+ */
+static inline void dev_coredumpm(struct device *dev, struct module *owner,
+				 void *data, size_t datalen, gfp_t gfp,
+				 ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+						 void *data, size_t datalen),
+				void (*free)(void *data))
+{
+	dev_coredumpm_timeout(dev, owner, data, datalen, gfp, read, free,
+			      DEVCD_TIMEOUT);
+}
+
 #endif /* __DEVCOREDUMP_H */
-- 
cgit v1.2.3


From e54700f7d6aa2ae0d0a0aeeebedcecd7ce1123fe Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Mon, 3 Jun 2024 20:24:30 +0530
Subject: drm/xe/bmg: Add PCI IDs

Add the initial set of device IDs for Battlemage.

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240603145430.1260817-1-balasubramani.vivekanandan@intel.com
---
 include/drm/xe_pciids.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h
index adb37bc541e4..644872a35c35 100644
--- a/include/drm/xe_pciids.h
+++ b/include/drm/xe_pciids.h
@@ -192,4 +192,11 @@
 	MACRO__(0x64A0, ## __VA_ARGS__), \
 	MACRO__(0x64B0, ## __VA_ARGS__)
 
+#define XE_BMG_IDS(MACRO__, ...) \
+	MACRO__(0xE202, ## __VA_ARGS__), \
+	MACRO__(0xE20B, ## __VA_ARGS__), \
+	MACRO__(0xE20C, ## __VA_ARGS__), \
+	MACRO__(0xE20D, ## __VA_ARGS__), \
+	MACRO__(0xE212, ## __VA_ARGS__)
+
 #endif
-- 
cgit v1.2.3


From e038ee6189842e9662d2fc59d09dbcf48350cf99 Mon Sep 17 00:00:00 2001
From: Anuj Gupta <anuj20.g@samsung.com>
Date: Mon, 10 Jun 2024 16:41:44 +0530
Subject: block: unmap and free user mapped integrity via submitter

The user mapped intergity is copied back and unpinned by
bio_integrity_free which is a low-level routine. Do it via the submitter
rather than doing it in the low-level block layer code, to split the
submitter side from the consumer side of the bio.

Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20240610111144.14647-1-anuj20.g@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index d5379548d684..818e93612947 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -731,6 +731,7 @@ static inline bool bioset_initialized(struct bio_set *bs)
 		bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
 
 int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
+void bio_integrity_unmap_free_user(struct bio *bio);
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
 extern bool bio_integrity_prep(struct bio *);
@@ -807,6 +808,9 @@ static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
 {
 	return -EINVAL;
 }
+static inline void bio_integrity_unmap_free_user(struct bio *bio)
+{
+}
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
-- 
cgit v1.2.3


From ec209ad86324de84ef66990f0e9df0851e45e054 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Wed, 12 Jun 2024 09:58:32 -0600
Subject: bpf: verifier: Relax caller requirements for kfunc projection type
 args

Currently, if a kfunc accepts a projection type as an argument (eg
struct __sk_buff *), the caller must exactly provide exactly the same
type with provable provenance.

However in practice, kfuncs that accept projection types _must_ cast to
the underlying type before use b/c projection type layouts are
completely made up. Thus, it is ok to relax the verifier rules around
implicit conversions.

We will use this functionality in the next commit when we align kfuncs
to user-facing types.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/e2c025cb09ccfd4af1ec9e18284dc3cecff7514d.1718207789.git.dxu@dxuuu.xyz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index f9e56fd12a9f..56d91daacdba 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -531,6 +531,7 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
 int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
 				struct module *owner);
 struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id);
+bool btf_is_projection_of(const char *pname, const char *tname);
 bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
 			   const struct btf_type *t, enum bpf_prog_type prog_type,
 			   int arg);
-- 
cgit v1.2.3


From cce4c40b960673f9e020835def310f1e89d3a940 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Wed, 12 Jun 2024 09:58:33 -0600
Subject: bpf: treewide: Align kfunc signatures to prog point-of-view

Previously, kfunc declarations in bpf_kfuncs.h (and others) used "user
facing" types for kfuncs prototypes while the actual kfunc definitions
used "kernel facing" types. More specifically: bpf_dynptr vs
bpf_dynptr_kern, __sk_buff vs sk_buff, and xdp_md vs xdp_buff.

It wasn't an issue before, as the verifier allows aliased types.
However, since we are now generating kfunc prototypes in vmlinux.h (in
addition to keeping bpf_kfuncs.h around), this conflict creates
compilation errors.

Fix this conflict by using "user facing" types in kfunc definitions.
This results in more casts, but otherwise has no additional runtime
cost.

Note, similar to 5b268d1ebcdc ("bpf: Have bpf_rdonly_cast() take a const
pointer"), we also make kfuncs take const arguments where appropriate in
order to make the kfunc more permissive.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/b58346a63a0e66bc9b7504da751b526b0b189a67.1718207789.git.dxu@dxuuu.xyz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a834f4b761bc..f636b4998bf7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3265,8 +3265,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 				struct bpf_insn *insn_buf,
 				struct bpf_prog *prog,
 				u32 *target_size);
-int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
-			       struct bpf_dynptr_kern *ptr);
+int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
+			       struct bpf_dynptr *ptr);
 #else
 static inline bool bpf_sock_common_is_valid_access(int off, int size,
 						   enum bpf_access_type type,
@@ -3288,8 +3288,8 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 {
 	return 0;
 }
-static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
-					     struct bpf_dynptr_kern *ptr)
+static inline int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
+					     struct bpf_dynptr *ptr)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3


From 7fc45cb68696c7213c484ec81892bc8a986fde52 Mon Sep 17 00:00:00 2001
From: Shradha Gupta <shradhagupta@linux.microsoft.com>
Date: Mon, 10 Jun 2024 03:28:39 -0700
Subject: net: mana: Allow variable size indirection table

Allow variable size indirection table allocation in MANA instead
of using a constant value MANA_INDIRECT_TABLE_SIZE.
The size is now derived from the MANA_QUERY_VPORT_CONFIG and the
indirection table is allocated dynamically.

Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
Link: https://lore.kernel.org/r/1718015319-9609-1-git-send-email-shradhagupta@linux.microsoft.com
Reviewed-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/net/mana/gdma.h | 4 +++-
 include/net/mana/mana.h | 9 +++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 27684135bb4d..c547756c4284 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -543,11 +543,13 @@ enum {
  */
 #define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
 #define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3)
+#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5)
 
 #define GDMA_DRV_CAP_FLAGS1 \
 	(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
 	 GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
-	 GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG)
+	 GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
+	 GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT)
 
 #define GDMA_DRV_CAP_FLAGS2 0
 
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 561f6719fb4e..59823901b74f 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -30,8 +30,8 @@ enum TRI_STATE {
 };
 
 /* Number of entries for hardware indirection table must be in power of 2 */
-#define MANA_INDIRECT_TABLE_SIZE 64
-#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1)
+#define MANA_INDIRECT_TABLE_MAX_SIZE 512
+#define MANA_INDIRECT_TABLE_DEF_SIZE 64
 
 /* The Toeplitz hash key's length in bytes: should be multiple of 8 */
 #define MANA_HASH_KEY_SIZE 40
@@ -410,10 +410,11 @@ struct mana_port_context {
 	struct mana_tx_qp *tx_qp;
 
 	/* Indirection Table for RX & TX. The values are queue indexes */
-	u32 indir_table[MANA_INDIRECT_TABLE_SIZE];
+	u32 *indir_table;
+	u32 indir_table_sz;
 
 	/* Indirection table containing RxObject Handles */
-	mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE];
+	mana_handle_t *rxobj_table;
 
 	/*  Hash key used by the NIC */
 	u8 hashkey[MANA_HASH_KEY_SIZE];
-- 
cgit v1.2.3


From b975d3ee5962237c1e2f5d5aeeaaf0dc2173486c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 9 Jun 2024 00:10:39 +0200
Subject: net: add and use skb_get_hash_net

Years ago flow dissector gained ability to delegate flow dissection
to a bpf program, scoped per netns.

Unfortunately, skb_get_hash() only gets an sk_buff argument instead
of both net+skb.  This means the flow dissector needs to obtain the
netns pointer from somewhere else.

The netns is derived from skb->dev, and if that is not available, from
skb->sk.  If neither is set, we hit a (benign) WARN_ON_ONCE().

Trying both dev and sk covers most cases, but not all, as recently
reported by Christoph Paasch.

In case of nf-generated tcp reset, both sk and dev are NULL:

WARNING: .. net/core/flow_dissector.c:1104
 skb_flow_dissect_flow_keys include/linux/skbuff.h:1536 [inline]
 skb_get_hash include/linux/skbuff.h:1578 [inline]
 nft_trace_init+0x7d/0x120 net/netfilter/nf_tables_trace.c:320
 nft_do_chain+0xb26/0xb90 net/netfilter/nf_tables_core.c:268
 nft_do_chain_ipv4+0x7a/0xa0 net/netfilter/nft_chain_filter.c:23
 nf_hook_slow+0x57/0x160 net/netfilter/core.c:626
 __ip_local_out+0x21d/0x260 net/ipv4/ip_output.c:118
 ip_local_out+0x26/0x1e0 net/ipv4/ip_output.c:127
 nf_send_reset+0x58c/0x700 net/ipv4/netfilter/nf_reject_ipv4.c:308
 nft_reject_ipv4_eval+0x53/0x90 net/ipv4/netfilter/nft_reject_ipv4.c:30
 [..]

syzkaller did something like this:
table inet filter {
  chain input {
    type filter hook input priority filter; policy accept;
    meta nftrace set 1
    tcp dport 42 reject with tcp reset
   }
   chain output {
    type filter hook output priority filter; policy accept;
    # empty chain is enough
   }
}

... then sends a tcp packet to port 42.

Initial attempt to simply set skb->dev from nf_reject_ipv4 doesn't cover
all cases: skbs generated via ipv4 igmp_send_report trigger similar splat.

Moreover, Pablo Neira found that nft_hash.c uses __skb_get_hash_symmetric()
which would trigger same warn splat for such skbs.

Lets allow callers to pass the current netns explicitly.
The nf_trace infrastructure is adjusted to use the new helper.

__skb_get_hash_symmetric is handled in the next patch.

Reported-by: Christoph Paasch <cpaasch@apple.com>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/494
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240608221057.16070-2-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fe7d8dbef77e..6e78019f899a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1498,7 +1498,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
 	__skb_set_hash(skb, hash, true, is_l4);
 }
 
-void __skb_get_hash(struct sk_buff *skb);
+void __skb_get_hash_net(const struct net *net, struct sk_buff *skb);
 u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
 u32 __skb_get_poff(const struct sk_buff *skb, const void *data,
@@ -1578,10 +1578,18 @@ void skb_flow_dissect_hash(const struct sk_buff *skb,
 			   struct flow_dissector *flow_dissector,
 			   void *target_container);
 
+static inline __u32 skb_get_hash_net(const struct net *net, struct sk_buff *skb)
+{
+	if (!skb->l4_hash && !skb->sw_hash)
+		__skb_get_hash_net(net, skb);
+
+	return skb->hash;
+}
+
 static inline __u32 skb_get_hash(struct sk_buff *skb)
 {
 	if (!skb->l4_hash && !skb->sw_hash)
-		__skb_get_hash(skb);
+		__skb_get_hash_net(NULL, skb);
 
 	return skb->hash;
 }
-- 
cgit v1.2.3


From d1dab4f71d372e00e2d34a9c32bf261623e3a95c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 9 Jun 2024 00:10:40 +0200
Subject: net: add and use __skb_get_hash_symmetric_net

Similar to previous patch: apply same logic for
__skb_get_hash_symmetric and let callers pass the netns to the dissector
core.

Existing function is turned into a wrapper to avoid adjusting all
callers, nft_hash.c uses new function.

Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240608221057.16070-3-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6e78019f899a..813406a9bd6c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1498,8 +1498,14 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
 	__skb_set_hash(skb, hash, true, is_l4);
 }
 
+u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb);
+
+static inline u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
+{
+	return __skb_get_hash_symmetric_net(NULL, skb);
+}
+
 void __skb_get_hash_net(const struct net *net, struct sk_buff *skb);
-u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
 u32 __skb_get_poff(const struct sk_buff *skb, const void *data,
 		   const struct flow_keys_basic *keys, int hlen);
-- 
cgit v1.2.3


From 3e453ca122d483eb519f934b6624215f0536301c Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Fri, 7 Jun 2024 17:13:53 +0200
Subject: net: ipv4,ipv6: Pass multipath hash computation through a helper

The following patches will add a sysctl to control multipath hash
seed. In order to centralize the hash computation, add a helper,
fib_multipath_hash_from_keys(), and have all IPv4 and IPv6 route.c
invocations of flow_hash_from_keys() go through this helper instead.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20240607151357.421181-2-petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip_fib.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 9b2f69ba5e49..b8b3c07e8f7b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -521,6 +521,13 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);
 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		       const struct sk_buff *skb, struct flow_keys *flkeys);
 #endif
+
+static inline u32 fib_multipath_hash_from_keys(const struct net *net,
+					       struct flow_keys *keys)
+{
+	return flow_hash_from_keys(keys);
+}
+
 int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
 		 struct netlink_ext_ack *extack);
 void fib_select_multipath(struct fib_result *res, int hash);
-- 
cgit v1.2.3


From 4ee2a8cace3fb9a34aea6a56426f89d26dd514f3 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Fri, 7 Jun 2024 17:13:54 +0200
Subject: net: ipv4: Add a sysctl to set multipath hash seed

When calculating hashes for the purpose of multipath forwarding, both IPv4
and IPv6 code currently fall back on flow_hash_from_keys(). That uses a
randomly-generated seed. That's a fine choice by default, but unfortunately
some deployments may need a tighter control over the seed used.

In this patch, make the seed configurable by adding a new sysctl key,
net.ipv4.fib_multipath_hash_seed to control the seed. This seed is used
specifically for multipath forwarding and not for the other concerns that
flow_hash_from_keys() is used for, such as queue selection. Expose the knob
as sysctl because other such settings, such as headers to hash, are also
handled that way. Like those, the multipath hash seed is a per-netns
variable.

Despite being placed in the net.ipv4 namespace, the multipath seed sysctl
is used for both IPv4 and IPv6, similarly to e.g. a number of TCP
variables.

The seed used by flow_hash_from_keys() is a 128-bit quantity. However it
seems that usually the seed is a much more modest value. 32 bits seem
typical (Cisco, Cumulus), some systems go even lower. For that reason, and
to decouple the user interface from implementation details, go with a
32-bit quantity, which is then quadruplicated to form the siphash key.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20240607151357.421181-3-petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/flow_dissector.h |  2 ++
 include/net/ip_fib.h         | 23 ++++++++++++++++++++++-
 include/net/netns/ipv4.h     |  8 ++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 99626475c3f4..3e47e123934d 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -442,6 +442,8 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
 }
 
 u32 flow_hash_from_keys(struct flow_keys *keys);
+u32 flow_hash_from_keys_seed(struct flow_keys *keys,
+			     const siphash_key_t *keyval);
 void skb_flow_get_icmp_tci(const struct sk_buff *skb,
 			   struct flow_dissector_key_icmp *key_icmp,
 			   const void *data, int thoff, int hlen);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index b8b3c07e8f7b..6e7984bfb986 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -520,13 +520,34 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		       const struct sk_buff *skb, struct flow_keys *flkeys);
-#endif
+
+static void
+fib_multipath_hash_construct_key(siphash_key_t *key, u32 mp_seed)
+{
+	u64 mp_seed_64 = mp_seed;
+
+	key->key[0] = (mp_seed_64 << 32) | mp_seed_64;
+	key->key[1] = key->key[0];
+}
 
+static inline u32 fib_multipath_hash_from_keys(const struct net *net,
+					       struct flow_keys *keys)
+{
+	siphash_aligned_key_t hash_key;
+	u32 mp_seed;
+
+	mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed;
+	fib_multipath_hash_construct_key(&hash_key, mp_seed);
+
+	return flow_hash_from_keys_seed(keys, &hash_key);
+}
+#else
 static inline u32 fib_multipath_hash_from_keys(const struct net *net,
 					       struct flow_keys *keys)
 {
 	return flow_hash_from_keys(keys);
 }
+#endif
 
 int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
 		 struct netlink_ext_ack *extack);
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index a91bb971f901..5fcd61ada622 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -40,6 +40,13 @@ struct inet_timewait_death_row {
 
 struct tcp_fastopen_context;
 
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+struct sysctl_fib_multipath_hash_seed {
+	u32 user_seed;
+	u32 mp_seed;
+};
+#endif
+
 struct netns_ipv4 {
 	/* Cacheline organization can be found documented in
 	 * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
@@ -246,6 +253,7 @@ struct netns_ipv4 {
 #endif
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
+	struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed;
 	u32 sysctl_fib_multipath_hash_fields;
 	u8 sysctl_fib_multipath_use_neigh;
 	u8 sysctl_fib_multipath_hash_policy;
-- 
cgit v1.2.3


From b48a1540b73a3c3a9ef59ce34176cc8180c6ce57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= <ast@fiberby.net>
Date: Sun, 9 Jun 2024 17:33:51 +0000
Subject: flow_offload: add encapsulation control flag helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds two new helper functions:
  flow_rule_is_supp_enc_control_flags()
  flow_rule_has_enc_control_flags()

They are intended to be used for validating encapsulation control
flags, and compliment the similar helpers without "enc_" in the name.

The only difference is that they have their own error message,
to make it obvious if an unsupported flag error is related to
FLOW_DISSECTOR_KEY_CONTROL or FLOW_DISSECTOR_KEY_ENC_CONTROL.

flow_rule_has_enc_control_flags() is for drivers supporting
FLOW_DISSECTOR_KEY_ENC_CONTROL, but not supporting any
encapsulation control flags.
(Currently all 4 drivers fits this category)

flow_rule_is_supp_enc_control_flags() is currently only used
for the above helper, but should also be used by drivers once
they implement at least one encapsulation control flag.

There is AFAICT currently no need for an "enc_" variant of
flow_rule_match_has_control_flags(), as all drivers currently
supporting FLOW_DISSECTOR_KEY_ENC_CONTROL, are already calling
flow_rule_match_enc_control() directly.

Only compile tested.

Signed-off-by: Asbjørn Sloth Tønnesen <ast@fiberby.net>
Reviewed-by: Davide Caratti <dcaratti@redhat.com>
Link: https://lore.kernel.org/r/20240609173358.193178-2-ast@fiberby.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/flow_offload.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index ec9f80509f60..292cd8f4b762 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -471,6 +471,28 @@ static inline bool flow_rule_is_supp_control_flags(const u32 supp_flags,
 	return false;
 }
 
+/**
+ * flow_rule_is_supp_enc_control_flags() - check for supported control flags
+ * @supp_enc_flags: encapsulation control flags supported by driver
+ * @enc_ctrl_flags: encapsulation control flags present in rule
+ * @extack: The netlink extended ACK for reporting errors.
+ *
+ * Return: true if only supported control flags are set, false otherwise.
+ */
+static inline bool flow_rule_is_supp_enc_control_flags(const u32 supp_enc_flags,
+						       const u32 enc_ctrl_flags,
+						       struct netlink_ext_ack *extack)
+{
+	if (likely((enc_ctrl_flags & ~supp_enc_flags) == 0))
+		return true;
+
+	NL_SET_ERR_MSG_FMT_MOD(extack,
+			       "Unsupported match on enc_control.flags %#x",
+			       enc_ctrl_flags);
+
+	return false;
+}
+
 /**
  * flow_rule_has_control_flags() - check for presence of any control flags
  * @ctrl_flags: control flags present in rule
@@ -484,6 +506,19 @@ static inline bool flow_rule_has_control_flags(const u32 ctrl_flags,
 	return !flow_rule_is_supp_control_flags(0, ctrl_flags, extack);
 }
 
+/**
+ * flow_rule_has_enc_control_flags() - check for presence of any control flags
+ * @enc_ctrl_flags: encapsulation control flags present in rule
+ * @extack: The netlink extended ACK for reporting errors.
+ *
+ * Return: true if control flags are set, false otherwise.
+ */
+static inline bool flow_rule_has_enc_control_flags(const u32 enc_ctrl_flags,
+						   struct netlink_ext_ack *extack)
+{
+	return !flow_rule_is_supp_enc_control_flags(0, enc_ctrl_flags, extack);
+}
+
 /**
  * flow_rule_match_has_control_flags() - match and check for any control flags
  * @rule: The flow_rule under evaluation.
-- 
cgit v1.2.3


From 80bbd1c355d661678d2a25bd36e739b6925e7a4e Mon Sep 17 00:00:00 2001
From: Luo Jie <quic_luoj@quicinc.com>
Date: Wed, 5 Jun 2024 20:45:39 +0800
Subject: dt-bindings: clock: add qca8386/qca8084 clock and reset definitions

QCA8386/QCA8084 includes the clock & reset controller that is
accessed by MDIO bus. Two work modes are supported, qca8386 works
as switch mode, qca8084 works as PHY mode.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Luo Jie <quic_luoj@quicinc.com>
Link: https://lore.kernel.org/r/20240605124541.2711467-3-quic_luoj@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,qca8k-nsscc.h | 101 +++++++++++++++++++++++++++
 include/dt-bindings/reset/qcom,qca8k-nsscc.h |  76 ++++++++++++++++++++
 2 files changed, 177 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,qca8k-nsscc.h
 create mode 100644 include/dt-bindings/reset/qcom,qca8k-nsscc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,qca8k-nsscc.h b/include/dt-bindings/clock/qcom,qca8k-nsscc.h
new file mode 100644
index 000000000000..0ac3e4c69a1a
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,qca8k-nsscc.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_QCA8K_NSS_CC_H
+#define _DT_BINDINGS_CLK_QCOM_QCA8K_NSS_CC_H
+
+#define NSS_CC_SWITCH_CORE_CLK_SRC				0
+#define NSS_CC_SWITCH_CORE_CLK					1
+#define NSS_CC_APB_BRIDGE_CLK					2
+#define NSS_CC_MAC0_TX_CLK_SRC					3
+#define NSS_CC_MAC0_TX_DIV_CLK_SRC				4
+#define NSS_CC_MAC0_TX_CLK					5
+#define NSS_CC_MAC0_TX_SRDS1_CLK				6
+#define NSS_CC_MAC0_RX_CLK_SRC					7
+#define NSS_CC_MAC0_RX_DIV_CLK_SRC				8
+#define NSS_CC_MAC0_RX_CLK					9
+#define NSS_CC_MAC0_RX_SRDS1_CLK				10
+#define NSS_CC_MAC1_TX_CLK_SRC					11
+#define NSS_CC_MAC1_TX_DIV_CLK_SRC				12
+#define NSS_CC_MAC1_SRDS1_CH0_XGMII_RX_DIV_CLK_SRC		13
+#define NSS_CC_MAC1_SRDS1_CH0_RX_CLK				14
+#define NSS_CC_MAC1_TX_CLK					15
+#define NSS_CC_MAC1_GEPHY0_TX_CLK				16
+#define NSS_CC_MAC1_SRDS1_CH0_XGMII_RX_CLK			17
+#define NSS_CC_MAC1_RX_CLK_SRC					18
+#define NSS_CC_MAC1_RX_DIV_CLK_SRC				19
+#define NSS_CC_MAC1_SRDS1_CH0_XGMII_TX_DIV_CLK_SRC		20
+#define NSS_CC_MAC1_SRDS1_CH0_TX_CLK				21
+#define NSS_CC_MAC1_RX_CLK					22
+#define NSS_CC_MAC1_GEPHY0_RX_CLK				23
+#define NSS_CC_MAC1_SRDS1_CH0_XGMII_TX_CLK			24
+#define NSS_CC_MAC2_TX_CLK_SRC					25
+#define NSS_CC_MAC2_TX_DIV_CLK_SRC				26
+#define NSS_CC_MAC2_SRDS1_CH1_XGMII_RX_DIV_CLK_SRC		27
+#define NSS_CC_MAC2_SRDS1_CH1_RX_CLK				28
+#define NSS_CC_MAC2_TX_CLK					29
+#define NSS_CC_MAC2_GEPHY1_TX_CLK				30
+#define NSS_CC_MAC2_SRDS1_CH1_XGMII_RX_CLK			31
+#define NSS_CC_MAC2_RX_CLK_SRC					32
+#define NSS_CC_MAC2_RX_DIV_CLK_SRC				33
+#define NSS_CC_MAC2_SRDS1_CH1_XGMII_TX_DIV_CLK_SRC		34
+#define NSS_CC_MAC2_SRDS1_CH1_TX_CLK				35
+#define NSS_CC_MAC2_RX_CLK					36
+#define NSS_CC_MAC2_GEPHY1_RX_CLK				37
+#define NSS_CC_MAC2_SRDS1_CH1_XGMII_TX_CLK			38
+#define NSS_CC_MAC3_TX_CLK_SRC					39
+#define NSS_CC_MAC3_TX_DIV_CLK_SRC				40
+#define NSS_CC_MAC3_SRDS1_CH2_XGMII_RX_DIV_CLK_SRC		41
+#define NSS_CC_MAC3_SRDS1_CH2_RX_CLK				42
+#define NSS_CC_MAC3_TX_CLK					43
+#define NSS_CC_MAC3_GEPHY2_TX_CLK				44
+#define NSS_CC_MAC3_SRDS1_CH2_XGMII_RX_CLK			45
+#define NSS_CC_MAC3_RX_CLK_SRC					46
+#define NSS_CC_MAC3_RX_DIV_CLK_SRC				47
+#define NSS_CC_MAC3_SRDS1_CH2_XGMII_TX_DIV_CLK_SRC		48
+#define NSS_CC_MAC3_SRDS1_CH2_TX_CLK				49
+#define NSS_CC_MAC3_RX_CLK					50
+#define NSS_CC_MAC3_GEPHY2_RX_CLK				51
+#define NSS_CC_MAC3_SRDS1_CH2_XGMII_TX_CLK			52
+#define NSS_CC_MAC4_TX_CLK_SRC					53
+#define NSS_CC_MAC4_TX_DIV_CLK_SRC				54
+#define NSS_CC_MAC4_SRDS1_CH3_XGMII_RX_DIV_CLK_SRC		55
+#define NSS_CC_MAC4_SRDS1_CH3_RX_CLK				56
+#define NSS_CC_MAC4_TX_CLK					57
+#define NSS_CC_MAC4_GEPHY3_TX_CLK				58
+#define NSS_CC_MAC4_SRDS1_CH3_XGMII_RX_CLK			59
+#define NSS_CC_MAC4_RX_CLK_SRC					60
+#define NSS_CC_MAC4_RX_DIV_CLK_SRC				61
+#define NSS_CC_MAC4_SRDS1_CH3_XGMII_TX_DIV_CLK_SRC		62
+#define NSS_CC_MAC4_SRDS1_CH3_TX_CLK				63
+#define NSS_CC_MAC4_RX_CLK					64
+#define NSS_CC_MAC4_GEPHY3_RX_CLK				65
+#define NSS_CC_MAC4_SRDS1_CH3_XGMII_TX_CLK			66
+#define NSS_CC_MAC5_TX_CLK_SRC					67
+#define NSS_CC_MAC5_TX_DIV_CLK_SRC				68
+#define NSS_CC_MAC5_TX_SRDS0_CLK				69
+#define NSS_CC_MAC5_TX_CLK					70
+#define NSS_CC_MAC5_RX_CLK_SRC					71
+#define NSS_CC_MAC5_RX_DIV_CLK_SRC				72
+#define NSS_CC_MAC5_RX_SRDS0_CLK				73
+#define NSS_CC_MAC5_RX_CLK					74
+#define NSS_CC_MAC5_TX_SRDS0_CLK_SRC				75
+#define NSS_CC_MAC5_RX_SRDS0_CLK_SRC				76
+#define NSS_CC_AHB_CLK_SRC					77
+#define NSS_CC_AHB_CLK						78
+#define NSS_CC_SEC_CTRL_AHB_CLK					79
+#define NSS_CC_TLMM_CLK						80
+#define NSS_CC_TLMM_AHB_CLK					81
+#define NSS_CC_CNOC_AHB_CLK					82
+#define NSS_CC_MDIO_AHB_CLK					83
+#define NSS_CC_MDIO_MASTER_AHB_CLK				84
+#define NSS_CC_SYS_CLK_SRC					85
+#define NSS_CC_SRDS0_SYS_CLK					86
+#define NSS_CC_SRDS1_SYS_CLK					87
+#define NSS_CC_GEPHY0_SYS_CLK					88
+#define NSS_CC_GEPHY1_SYS_CLK					89
+#define NSS_CC_GEPHY2_SYS_CLK					90
+#define NSS_CC_GEPHY3_SYS_CLK					91
+#endif
diff --git a/include/dt-bindings/reset/qcom,qca8k-nsscc.h b/include/dt-bindings/reset/qcom,qca8k-nsscc.h
new file mode 100644
index 000000000000..c71167a3bd41
--- /dev/null
+++ b/include/dt-bindings/reset/qcom,qca8k-nsscc.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_RESET_QCOM_QCA8K_NSS_CC_H
+#define _DT_BINDINGS_RESET_QCOM_QCA8K_NSS_CC_H
+
+#define NSS_CC_SWITCH_CORE_ARES				1
+#define NSS_CC_APB_BRIDGE_ARES				2
+#define NSS_CC_MAC0_TX_ARES				3
+#define NSS_CC_MAC0_TX_SRDS1_ARES			4
+#define NSS_CC_MAC0_RX_ARES				5
+#define NSS_CC_MAC0_RX_SRDS1_ARES			6
+#define NSS_CC_MAC1_SRDS1_CH0_RX_ARES			7
+#define NSS_CC_MAC1_TX_ARES				8
+#define NSS_CC_MAC1_GEPHY0_TX_ARES			9
+#define NSS_CC_MAC1_SRDS1_CH0_XGMII_RX_ARES		10
+#define NSS_CC_MAC1_SRDS1_CH0_TX_ARES			11
+#define NSS_CC_MAC1_RX_ARES				12
+#define NSS_CC_MAC1_GEPHY0_RX_ARES			13
+#define NSS_CC_MAC1_SRDS1_CH0_XGMII_TX_ARES		14
+#define NSS_CC_MAC2_SRDS1_CH1_RX_ARES			15
+#define NSS_CC_MAC2_TX_ARES				16
+#define NSS_CC_MAC2_GEPHY1_TX_ARES			17
+#define NSS_CC_MAC2_SRDS1_CH1_XGMII_RX_ARES		18
+#define NSS_CC_MAC2_SRDS1_CH1_TX_ARES			19
+#define NSS_CC_MAC2_RX_ARES				20
+#define NSS_CC_MAC2_GEPHY1_RX_ARES			21
+#define NSS_CC_MAC2_SRDS1_CH1_XGMII_TX_ARES		22
+#define NSS_CC_MAC3_SRDS1_CH2_RX_ARES			23
+#define NSS_CC_MAC3_TX_ARES				24
+#define NSS_CC_MAC3_GEPHY2_TX_ARES			25
+#define NSS_CC_MAC3_SRDS1_CH2_XGMII_RX_ARES		26
+#define NSS_CC_MAC3_SRDS1_CH2_TX_ARES			27
+#define NSS_CC_MAC3_RX_ARES				28
+#define NSS_CC_MAC3_GEPHY2_RX_ARES			29
+#define NSS_CC_MAC3_SRDS1_CH2_XGMII_TX_ARES		30
+#define NSS_CC_MAC4_SRDS1_CH3_RX_ARES			31
+#define NSS_CC_MAC4_TX_ARES				32
+#define NSS_CC_MAC4_GEPHY3_TX_ARES			33
+#define NSS_CC_MAC4_SRDS1_CH3_XGMII_RX_ARES		34
+#define NSS_CC_MAC4_SRDS1_CH3_TX_ARES			35
+#define NSS_CC_MAC4_RX_ARES				36
+#define NSS_CC_MAC4_GEPHY3_RX_ARES			37
+#define NSS_CC_MAC4_SRDS1_CH3_XGMII_TX_ARES		38
+#define NSS_CC_MAC5_TX_ARES				39
+#define NSS_CC_MAC5_TX_SRDS0_ARES			40
+#define NSS_CC_MAC5_RX_ARES				41
+#define NSS_CC_MAC5_RX_SRDS0_ARES			42
+#define NSS_CC_AHB_ARES					43
+#define NSS_CC_SEC_CTRL_AHB_ARES			44
+#define NSS_CC_TLMM_ARES				45
+#define NSS_CC_TLMM_AHB_ARES				46
+#define NSS_CC_CNOC_AHB_ARES				47
+#define NSS_CC_MDIO_AHB_ARES				48
+#define NSS_CC_MDIO_MASTER_AHB_ARES			49
+#define NSS_CC_SRDS0_SYS_ARES				50
+#define NSS_CC_SRDS1_SYS_ARES				51
+#define NSS_CC_GEPHY0_SYS_ARES				52
+#define NSS_CC_GEPHY1_SYS_ARES				53
+#define NSS_CC_GEPHY2_SYS_ARES				54
+#define NSS_CC_GEPHY3_SYS_ARES				55
+#define NSS_CC_SEC_CTRL_ARES				56
+#define NSS_CC_SEC_CTRL_SENSE_ARES			57
+#define NSS_CC_SLEEP_ARES				58
+#define NSS_CC_DEBUG_ARES				59
+#define NSS_CC_GEPHY0_ARES				60
+#define NSS_CC_GEPHY1_ARES				61
+#define NSS_CC_GEPHY2_ARES				62
+#define NSS_CC_GEPHY3_ARES				63
+#define NSS_CC_DSP_ARES					64
+#define NSS_CC_GEPHY_FULL_ARES				65
+#define NSS_CC_GLOBAL_ARES				66
+#define NSS_CC_XPCS_ARES				67
+#endif
-- 
cgit v1.2.3


From 525b42832bd333e3e7ccb0efceb41b47347beab5 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Thu, 6 Jun 2024 13:36:00 +0200
Subject: dt-bindings: clock: Add Qcom QCM2290 GPUCC

Add device tree bindings for graphics clock controller for Qualcomm
Technology Inc's QCM2290 SoCs.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Link: https://lore.kernel.org/r/20240606-topic-rb1_gpu-v4-1-4bc0c19da4af@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,qcm2290-gpucc.h | 32 ++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,qcm2290-gpucc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,qcm2290-gpucc.h b/include/dt-bindings/clock/qcom,qcm2290-gpucc.h
new file mode 100644
index 000000000000..7c76dd05278f
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,qcm2290-gpucc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2024, Linaro Limited
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_QCM2290_H
+#define _DT_BINDINGS_CLK_QCOM_GPU_CC_QCM2290_H
+
+/* GPU_CC clocks */
+#define GPU_CC_AHB_CLK			0
+#define GPU_CC_CRC_AHB_CLK		1
+#define GPU_CC_CX_GFX3D_CLK		2
+#define GPU_CC_CX_GMU_CLK		3
+#define GPU_CC_CX_SNOC_DVM_CLK		4
+#define GPU_CC_CXO_AON_CLK		5
+#define GPU_CC_CXO_CLK			6
+#define GPU_CC_GMU_CLK_SRC		7
+#define GPU_CC_GX_GFX3D_CLK		8
+#define GPU_CC_GX_GFX3D_CLK_SRC		9
+#define GPU_CC_PLL0			10
+#define GPU_CC_SLEEP_CLK		11
+#define GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK	12
+
+/* Resets */
+#define GPU_GX_BCR			0
+
+/* GDSCs */
+#define GPU_CX_GDSC			0
+#define GPU_GX_GDSC			1
+
+#endif
-- 
cgit v1.2.3


From b5c29fba72a6c950655d1cb0f6aa16b60dc83be7 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 28 May 2024 12:54:58 +0800
Subject: iommu: Make iommu_sva_domain_alloc() static

iommu_sva_domain_alloc() is only called in iommu-sva.c, hence make it
static.

On the other hand, iommu_sva_domain_alloc() should not return NULL anymore
after commit <80af5a452024> ("iommu: Add ops->domain_alloc_sva()"), the
removal of inline code avoids potential confusion.

Fixes: 80af5a452024 ("iommu: Add ops->domain_alloc_sva()")
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20240528045458.81458-1-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 17b3f36ad843..1cd19b903354 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1527,8 +1527,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
 					struct mm_struct *mm);
 void iommu_sva_unbind_device(struct iommu_sva *handle);
 u32 iommu_sva_get_pasid(struct iommu_sva *handle);
-struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
-					    struct mm_struct *mm);
 #else
 static inline struct iommu_sva *
 iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
@@ -1553,12 +1551,6 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm)
 }
 
 static inline void mm_pasid_drop(struct mm_struct *mm) {}
-
-static inline struct iommu_domain *
-iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm)
-{
-	return NULL;
-}
 #endif /* CONFIG_IOMMU_SVA */
 
 #ifdef CONFIG_IOMMU_IOPF
-- 
cgit v1.2.3


From 0e6b6dedf16800df0ff73ffe2bb5066514db29c2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 12 Jun 2024 16:15:55 +0200
Subject: ACPI: EC: Evaluate orphan _REG under EC device

After starting to install the EC address space handler at the ACPI
namespace root, if there is an "orphan" _REG method in the EC device's
scope, it will not be evaluated any more.  This breaks EC operation
regions on some systems, like Asus gu605.

To address this, use a wrapper around an existing ACPICA function to
look for an "orphan" _REG method in the EC device scope and evaluate
it if present.

Fixes: 60fa6ae6e6d0 ("ACPI: EC: Install address space handler at the namespace root")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218945
Reported-by: VitaliiT <vitaly.torshyn@gmail.com>
Tested-by: VitaliiT <vitaly.torshyn@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 94d0fc3bd412..80dc36f9d527 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -662,6 +662,10 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_execute_reg_methods(acpi_handle device,
 						     acpi_adr_space_type
 						     space_id))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+			    acpi_execute_orphan_reg_method(acpi_handle device,
+							   acpi_adr_space_type
+							   space_id))
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_remove_address_space_handler(acpi_handle
 							      device,
-- 
cgit v1.2.3


From 1a8009e108382848d149a24dd3fc67607d054a05 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Mon, 10 Jun 2024 10:57:30 +0200
Subject: dt-bindings: interconnect: Add MediaTek EMI Interconnect bindings

Add bindings for the MediaTek External Memory Interface Interconnect,
which providers support system bandwidth requirements through Dynamic
Voltage Frequency Scaling Resource Collector (DVFSRC) hardware.

This adds bindings for MediaTek MT8183 and MT8195 SoCs.

Note that this is modeled as a subnode of DVFSRC for multiple reasons:
 - Some SoCs have more than one interconnect on the DVFSRC (and two
   different kinds of EMI interconnect, and also a SMI interconnect);

 - Some boards will want to not enable the interconnect driver because
   some of those are not battery powered (so they just keep the knobs
   at full thrust from the bootloader and never care scaling busses);

 - Some DVFSRC interconnect features may depend on firmware.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20240610085735.147134-3-angelogioacchino.delregno@collabora.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 include/dt-bindings/interconnect/mediatek,mt8183.h | 23 +++++++++++
 include/dt-bindings/interconnect/mediatek,mt8195.h | 44 ++++++++++++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100644 include/dt-bindings/interconnect/mediatek,mt8183.h
 create mode 100644 include/dt-bindings/interconnect/mediatek,mt8195.h

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/mediatek,mt8183.h b/include/dt-bindings/interconnect/mediatek,mt8183.h
new file mode 100644
index 000000000000..1088c350258d
--- /dev/null
+++ b/include/dt-bindings/interconnect/mediatek,mt8183.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ * Copyright (c) 2024 Collabora Ltd.
+ *                    AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_MEDIATEK_MT8183_H
+#define __DT_BINDINGS_INTERCONNECT_MEDIATEK_MT8183_H
+
+#define SLAVE_DDR_EMI		0
+#define MASTER_MCUSYS		1
+#define MASTER_MFG		2
+#define MASTER_MMSYS		3
+#define MASTER_MM_VPU		4
+#define MASTER_MM_DISP		5
+#define MASTER_MM_VDEC		6
+#define MASTER_MM_VENC		7
+#define MASTER_MM_CAM		8
+#define MASTER_MM_IMG		9
+#define MASTER_MM_MDP		10
+
+#endif
diff --git a/include/dt-bindings/interconnect/mediatek,mt8195.h b/include/dt-bindings/interconnect/mediatek,mt8195.h
new file mode 100644
index 000000000000..33e0e6cde732
--- /dev/null
+++ b/include/dt-bindings/interconnect/mediatek,mt8195.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2020 MediaTek Inc.
+ * Copyright (c) 2024 Collabora Ltd.
+ *                    AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_MEDIATEK_MT8195_H
+#define __DT_BINDINGS_INTERCONNECT_MEDIATEK_MT8195_H
+
+#define SLAVE_DDR_EMI		0
+#define MASTER_MCUSYS		1
+#define MASTER_GPUSYS		2
+#define MASTER_MMSYS		3
+#define MASTER_MM_VPU		4
+#define MASTER_MM_DISP		5
+#define MASTER_MM_VDEC		6
+#define MASTER_MM_VENC		7
+#define MASTER_MM_CAM		8
+#define MASTER_MM_IMG		9
+#define MASTER_MM_MDP		10
+#define MASTER_VPUSYS		11
+#define MASTER_VPU_0		12
+#define MASTER_VPU_1		13
+#define MASTER_MDLASYS		14
+#define MASTER_MDLA_0		15
+#define MASTER_UFS		16
+#define MASTER_PCIE_0		17
+#define MASTER_PCIE_1		18
+#define MASTER_USB		19
+#define MASTER_DBGIF		20
+#define SLAVE_HRT_DDR_EMI	21
+#define MASTER_HRT_MMSYS	22
+#define MASTER_HRT_MM_DISP	23
+#define MASTER_HRT_MM_VDEC	24
+#define MASTER_HRT_MM_VENC	25
+#define MASTER_HRT_MM_CAM	26
+#define MASTER_HRT_MM_IMG	27
+#define MASTER_HRT_MM_MDP	28
+#define MASTER_HRT_DBGIF	29
+#define MASTER_WIFI		30
+#define MASTER_BT		31
+#define MASTER_NETSYS		32
+#endif
-- 
cgit v1.2.3


From 163f10b2935362f0e8ef8d7fadd0b5aa33e9130f Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Wed, 12 Jun 2024 08:47:07 +0200
Subject: PCI: Add INTEL_HDA_PTL to pci_ids.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

More PCI ids for Intel audio.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20240612064709.51141-2-pierre-louis.bossart@linux.intel.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 942a587bb97e..0168c6a60148 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -3112,6 +3112,7 @@
 #define PCI_DEVICE_ID_INTEL_HDA_LNL_P	0xa828
 #define PCI_DEVICE_ID_INTEL_S21152BB	0xb152
 #define PCI_DEVICE_ID_INTEL_HDA_BMG	0xe2f7
+#define PCI_DEVICE_ID_INTEL_HDA_PTL	0xe428
 #define PCI_DEVICE_ID_INTEL_HDA_CML_R	0xf0c8
 #define PCI_DEVICE_ID_INTEL_HDA_RKL_S	0xf1c8
 
-- 
cgit v1.2.3


From a3cfe84cca28f205761a0450016593b0d728165e Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vadfed@meta.com>
Date: Thu, 6 Jun 2024 07:58:50 -0700
Subject: bpf: Add CHECKSUM_COMPLETE to bpf test progs

Add special flag to validate that TC BPF program properly updates
checksum information in skb.

Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20240606145851.229116-1-vadfed@meta.com
---
 include/uapi/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 25ea393cf084..35bcf52dbc65 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1425,6 +1425,8 @@ enum {
 #define BPF_F_TEST_RUN_ON_CPU	(1U << 0)
 /* If set, XDP frames will be transmitted after processing */
 #define BPF_F_TEST_XDP_LIVE_FRAMES	(1U << 1)
+/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */
+#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE	(1U << 2)
 
 /* type for BPF_ENABLE_STATS */
 enum bpf_stats_type {
-- 
cgit v1.2.3


From 2755d1f46aa25f65179964bf315d8a16b3540eab Mon Sep 17 00:00:00 2001
From: Maxime Ripard <mripard@kernel.org>
Date: Mon, 10 Jun 2024 13:12:00 +0200
Subject: drm/connector: hdmi: Fix kerneldoc warnings

It looks like the documentation for the HDMI-related fields recently
added to both the drm_connector and drm_connector_state structures
trigger some warnings because of their use of anonymous structures:

  $ scripts/kernel-doc -none include/drm/drm_connector.h
  include/drm/drm_connector.h:1138: warning: Excess struct member 'broadcast_rgb' description in 'drm_connector_state'
  include/drm/drm_connector.h:1138: warning: Excess struct member 'infoframes' description in 'drm_connector_state'
  include/drm/drm_connector.h:1138: warning: Excess struct member 'avi' description in 'drm_connector_state'
  include/drm/drm_connector.h:1138: warning: Excess struct member 'hdr_drm' description in 'drm_connector_state'
  include/drm/drm_connector.h:1138: warning: Excess struct member 'spd' description in 'drm_connector_state'
  include/drm/drm_connector.h:1138: warning: Excess struct member 'vendor' description in 'drm_connector_state'
  include/drm/drm_connector.h:1138: warning: Excess struct member 'is_limited_range' description in 'drm_connector_state'
  include/drm/drm_connector.h:1138: warning: Excess struct member 'output_bpc' description in 'drm_connector_state'
  include/drm/drm_connector.h:1138: warning: Excess struct member 'output_format' description in 'drm_connector_state'
  include/drm/drm_connector.h:1138: warning: Excess struct member 'tmds_char_rate' description in 'drm_connector_state'
  include/drm/drm_connector.h:2112: warning: Excess struct member 'vendor' description in 'drm_connector'
  include/drm/drm_connector.h:2112: warning: Excess struct member 'product' description in 'drm_connector'
  include/drm/drm_connector.h:2112: warning: Excess struct member 'supported_formats' description in 'drm_connector'
  include/drm/drm_connector.h:2112: warning: Excess struct member 'infoframes' description in 'drm_connector'
  include/drm/drm_connector.h:2112: warning: Excess struct member 'lock' description in 'drm_connector'
  include/drm/drm_connector.h:2112: warning: Excess struct member 'audio' description in 'drm_connector'

Create some intermediate structures instead of anonymous ones to silence
the warnings.

Reported-by: Jani Nikula <jani.nikula@linux.intel.com>
Suggested-by: Jani Nikula <jani.nikula@linux.intel.com>
Fixes: 54cb39e2293b ("drm/connector: hdmi: Create an HDMI sub-state")
Fixes: 948f01d5e559 ("drm/connector: hdmi: Add support for output format")
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240610111200.428224-1-mripard@kernel.org
---
 include/drm/drm_connector.h | 206 +++++++++++++++++++++++---------------------
 1 file changed, 108 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index e04a8a0d1bbd..f750765d8fbc 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -929,6 +929,67 @@ struct drm_connector_hdmi_infoframe {
 	bool set;
 };
 
+/*
+ * struct drm_connector_hdmi_state - HDMI state container
+ */
+struct drm_connector_hdmi_state {
+	/**
+	 * @broadcast_rgb: Connector property to pass the
+	 * Broadcast RGB selection value.
+	 */
+	enum drm_hdmi_broadcast_rgb broadcast_rgb;
+
+	/**
+	 * @infoframes: HDMI Infoframes matching that state
+	 */
+	struct {
+		/**
+		 * @avi: AVI Infoframes structure matching our
+		 * state.
+		 */
+		struct drm_connector_hdmi_infoframe avi;
+
+		/**
+		 * @hdr_drm: DRM (Dynamic Range and Mastering)
+		 * Infoframes structure matching our state.
+		 */
+		struct drm_connector_hdmi_infoframe hdr_drm;
+
+		/**
+		 * @spd: SPD Infoframes structure matching our
+		 * state.
+		 */
+		struct drm_connector_hdmi_infoframe spd;
+
+		/**
+		 * @vendor: HDMI Vendor Infoframes structure
+		 * matching our state.
+		 */
+		struct drm_connector_hdmi_infoframe hdmi;
+	} infoframes;
+
+	/**
+	 * @is_limited_range: Is the output supposed to use a limited
+	 * RGB Quantization Range or not?
+	 */
+	bool is_limited_range;
+
+	/**
+	 * @output_bpc: Bits per color channel to output.
+	 */
+	unsigned int output_bpc;
+
+	/**
+	 * @output_format: Pixel format to output in.
+	 */
+	enum hdmi_colorspace output_format;
+
+	/**
+	 * @tmds_char_rate: TMDS Character Rate, in Hz.
+	 */
+	unsigned long long tmds_char_rate;
+};
+
 /**
  * struct drm_connector_state - mutable connector state
  */
@@ -1078,63 +1139,7 @@ struct drm_connector_state {
 	 * @hdmi: HDMI-related variable and properties. Filled by
 	 * @drm_atomic_helper_connector_hdmi_check().
 	 */
-	struct {
-		/**
-		 * @broadcast_rgb: Connector property to pass the
-		 * Broadcast RGB selection value.
-		 */
-		enum drm_hdmi_broadcast_rgb broadcast_rgb;
-
-		/**
-		 * @infoframes: HDMI Infoframes matching that state
-		 */
-		struct {
-			/**
-			 * @avi: AVI Infoframes structure matching our
-			 * state.
-			 */
-			struct drm_connector_hdmi_infoframe avi;
-
-			/**
-			 * @hdr_drm: DRM (Dynamic Range and Mastering)
-			 * Infoframes structure matching our state.
-			 */
-			struct drm_connector_hdmi_infoframe hdr_drm;
-
-			/**
-			 * @spd: SPD Infoframes structure matching our
-			 * state.
-			 */
-			struct drm_connector_hdmi_infoframe spd;
-
-			/**
-			 * @vendor: HDMI Vendor Infoframes structure
-			 * matching our state.
-			 */
-			struct drm_connector_hdmi_infoframe hdmi;
-		} infoframes;
-
-		/**
-		 * @is_limited_range: Is the output supposed to use a limited
-		 * RGB Quantization Range or not?
-		 */
-		bool is_limited_range;
-
-		/**
-		 * @output_bpc: Bits per color channel to output.
-		 */
-		unsigned int output_bpc;
-
-		/**
-		 * @output_format: Pixel format to output in.
-		 */
-		enum hdmi_colorspace output_format;
-
-		/**
-		 * @tmds_char_rate: TMDS Character Rate, in Hz.
-		 */
-		unsigned long long tmds_char_rate;
-	} hdmi;
+	struct drm_connector_hdmi_state hdmi;
 };
 
 /**
@@ -1656,6 +1661,51 @@ struct drm_cmdline_mode {
 	bool tv_mode_specified;
 };
 
+/*
+ * struct drm_connector_hdmi - DRM Connector HDMI-related structure
+ */
+struct drm_connector_hdmi {
+#define DRM_CONNECTOR_HDMI_VENDOR_LEN	8
+	/**
+	 * @vendor: HDMI Controller Vendor Name
+	 */
+	unsigned char vendor[DRM_CONNECTOR_HDMI_VENDOR_LEN] __nonstring;
+
+#define DRM_CONNECTOR_HDMI_PRODUCT_LEN	16
+	/**
+	 * @product: HDMI Controller Product Name
+	 */
+	unsigned char product[DRM_CONNECTOR_HDMI_PRODUCT_LEN] __nonstring;
+
+	/**
+	 * @supported_formats: Bitmask of @hdmi_colorspace
+	 * supported by the controller.
+	 */
+	unsigned long supported_formats;
+
+	/**
+	 * @funcs: HDMI connector Control Functions
+	 */
+	const struct drm_connector_hdmi_funcs *funcs;
+
+	/**
+	 * @infoframes: Current Infoframes output by the connector
+	 */
+	struct {
+		/**
+		 * @lock: Mutex protecting against concurrent access to
+		 * the infoframes, most notably between KMS and ALSA.
+		 */
+		struct mutex lock;
+
+		/**
+		 * @audio: Current Audio Infoframes structure. Protected
+		 * by @lock.
+		 */
+		struct drm_connector_hdmi_infoframe audio;
+	} infoframes;
+};
+
 /**
  * struct drm_connector - central DRM connector control structure
  *
@@ -2068,47 +2118,7 @@ struct drm_connector {
 	/**
 	 * @hdmi: HDMI-related variable and properties.
 	 */
-	struct {
-#define DRM_CONNECTOR_HDMI_VENDOR_LEN	8
-		/**
-		 * @vendor: HDMI Controller Vendor Name
-		 */
-		unsigned char vendor[DRM_CONNECTOR_HDMI_VENDOR_LEN] __nonstring;
-
-#define DRM_CONNECTOR_HDMI_PRODUCT_LEN	16
-		/**
-		 * @product: HDMI Controller Product Name
-		 */
-		unsigned char product[DRM_CONNECTOR_HDMI_PRODUCT_LEN] __nonstring;
-
-		/**
-		 * @supported_formats: Bitmask of @hdmi_colorspace
-		 * supported by the controller.
-		 */
-		unsigned long supported_formats;
-
-		/**
-		 * @funcs: HDMI connector Control Functions
-		 */
-		const struct drm_connector_hdmi_funcs *funcs;
-
-		/**
-		 * @infoframes: Current Infoframes output by the connector
-		 */
-		struct {
-			/**
-			 * @lock: Mutex protecting against concurrent access to
-			 * the infoframes, most notably between KMS and ALSA.
-			 */
-			struct mutex lock;
-
-			/**
-			 * @audio: Current Audio Infoframes structure. Protected
-			 * by @lock.
-			 */
-			struct drm_connector_hdmi_infoframe audio;
-		} infoframes;
-	} hdmi;
+	struct drm_connector_hdmi hdmi;
 };
 
 #define obj_to_connector(x) container_of(x, struct drm_connector, base)
-- 
cgit v1.2.3


From ff985c759778986f55cbc557055fbeb84ee833eb Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 11 Jun 2024 15:01:04 +0200
Subject: auxbus: make to_auxiliary_drv accept and return a constant pointer

In the quest to make struct device constant, start by making
to_auxiliary_drv() return a constant pointer so that drivers that call
this can be fixed up before the driver core changes.

As the return type previously was not constant, also fix up all callers
that were assuming that the pointer was not going to be a constant one
in order to not break the build.

Cc: Dave Ertman <david.m.ertman@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Bingbu Cao <bingbu.cao@intel.com>
Cc: Tianshu Qiu <tian.shu.qiu@intel.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Michael Chan <michael.chan@broadcom.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: Tony Nguyen <anthony.l.nguyen@intel.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Tariq Toukan <tariqt@nvidia.com>
Cc: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Cc: Bard Liao <yung-chuan.liao@linux.intel.com>
Cc: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Cc: Daniel Baluta <daniel.baluta@nxp.com>
Cc: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: linux-media@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: intel-wired-lan@lists.osuosl.org
Cc: linux-rdma@vger.kernel.org
Cc: sound-open-firmware@alsa-project.org
Cc: linux-sound@vger.kernel.org
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com> # drivers/media/pci/intel/ipu6
Acked-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Martin Habets <habetsm.xilinx@gmail.com>
Link: https://lore.kernel.org/r/20240611130103.3262749-7-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/auxiliary_bus.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index de21d9d24a95..bdff7b85f2ae 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -203,7 +203,7 @@ static inline struct auxiliary_device *to_auxiliary_dev(struct device *dev)
 	return container_of(dev, struct auxiliary_device, dev);
 }
 
-static inline struct auxiliary_driver *to_auxiliary_drv(struct device_driver *drv)
+static inline const struct auxiliary_driver *to_auxiliary_drv(const struct device_driver *drv)
 {
 	return container_of(drv, struct auxiliary_driver, driver);
 }
-- 
cgit v1.2.3


From 288b550463cf5dd21ad34f736b8c5ccb7ff69ceb Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 8 Jun 2024 17:55:24 +0200
Subject: mfd: pm8008: Rework to match new DT binding

Rework the pm8008 driver to match the new devicetree binding which no
longer describes internal details like interrupts and register offsets
(including which of the two consecutive I2C addresses the registers
belong to).

Instead make the interrupt controller implementation internal and pass
interrupts to the subdrivers using MFD cell resources.

Note that subdrivers may either get their resources, like register block
offsets, from the parent MFD or this can be included in the subdrivers
directly.

In the current implementation, the temperature alarm driver is generic
enough to just get its base address and alarm interrupt from the parent
driver, which already uses this information to implement the interrupt
controller.

The regulator driver, however, needs additional information like parent
supplies and regulator characteristics so in that case it is easier to
just augment its table with the regulator register base addresses.

Similarly, the current GPIO driver already holds the number of pins and
that lookup table can therefore also be extended with register offsets.

Note that subdrivers can now access the two regmaps by name, even if the
primary regmap is registered last so that it is returned by default when
no name is provided in lookups.

Finally, note that the temperature alarm and GPIO subdrivers need some
minor rework before they can be used with non-SPMI devices like the
PM8008. The temperature alarm MFD cell name specifically uses a "qpnp"
rather than "spmi" prefix to prevent binding until the driver has been
updated.

Tested-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20240608155526.12996-11-johan+linaro@kernel.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/dt-bindings/mfd/qcom-pm8008.h | 19 -------------------
 1 file changed, 19 deletions(-)
 delete mode 100644 include/dt-bindings/mfd/qcom-pm8008.h

(limited to 'include')

diff --git a/include/dt-bindings/mfd/qcom-pm8008.h b/include/dt-bindings/mfd/qcom-pm8008.h
deleted file mode 100644
index eca9448df228..000000000000
--- a/include/dt-bindings/mfd/qcom-pm8008.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2021 The Linux Foundation. All rights reserved.
- */
-
-#ifndef __DT_BINDINGS_MFD_QCOM_PM8008_H
-#define __DT_BINDINGS_MFD_QCOM_PM8008_H
-
-/* PM8008 IRQ numbers */
-#define PM8008_IRQ_MISC_UVLO	0
-#define PM8008_IRQ_MISC_OVLO	1
-#define PM8008_IRQ_MISC_OTST2	2
-#define PM8008_IRQ_MISC_OTST3	3
-#define PM8008_IRQ_MISC_LDO_OCP	4
-#define PM8008_IRQ_TEMP_ALARM	5
-#define PM8008_IRQ_GPIO1	6
-#define PM8008_IRQ_GPIO2	7
-
-#endif
-- 
cgit v1.2.3


From 92424801261d1564a0bb759da3cf3ccd69fdf5a2 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 13 Jun 2024 13:53:08 +0200
Subject: bpf: Fix reg_set_min_max corruption of fake_reg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Juan reported that after doing some changes to buzzer [0] and implementing
a new fuzzing strategy guided by coverage, they noticed the following in
one of the probes:

  [...]
  13: (79) r6 = *(u64 *)(r0 +0)         ; R0=map_value(ks=4,vs=8) R6_w=scalar()
  14: (b7) r0 = 0                       ; R0_w=0
  15: (b4) w0 = -1                      ; R0_w=0xffffffff
  16: (74) w0 >>= 1                     ; R0_w=0x7fffffff
  17: (5c) w6 &= w0                     ; R0_w=0x7fffffff R6_w=scalar(smin=smin32=0,smax=umax=umax32=0x7fffffff,var_off=(0x0; 0x7fffffff))
  18: (44) w6 |= 2                      ; R6_w=scalar(smin=umin=smin32=umin32=2,smax=umax=umax32=0x7fffffff,var_off=(0x2; 0x7ffffffd))
  19: (56) if w6 != 0x7ffffffd goto pc+1
  REG INVARIANTS VIOLATION (true_reg2): range bounds violation u64=[0x7fffffff, 0x7ffffffd] s64=[0x7fffffff, 0x7ffffffd] u32=[0x7fffffff, 0x7ffffffd] s32=[0x7fffffff, 0x7ffffffd] var_off=(0x7fffffff, 0x0)
  REG INVARIANTS VIOLATION (false_reg1): range bounds violation u64=[0x7fffffff, 0x7ffffffd] s64=[0x7fffffff, 0x7ffffffd] u32=[0x7fffffff, 0x7ffffffd] s32=[0x7fffffff, 0x7ffffffd] var_off=(0x7fffffff, 0x0)
  REG INVARIANTS VIOLATION (false_reg2): const tnum out of sync with range bounds u64=[0x0, 0xffffffffffffffff] s64=[0x8000000000000000, 0x7fffffffffffffff] u32=[0x0, 0xffffffff] s32=[0x80000000, 0x7fffffff] var_off=(0x7fffffff, 0x0)
  19: R6_w=0x7fffffff
  20: (95) exit

  from 19 to 21: R0=0x7fffffff R6=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x7ffffffe,var_off=(0x2; 0x7ffffffd)) R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm
  21: R0=0x7fffffff R6=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x7ffffffe,var_off=(0x2; 0x7ffffffd)) R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm
  21: (14) w6 -= 2147483632             ; R6_w=scalar(smin=umin=umin32=2,smax=umax=0xffffffff,smin32=0x80000012,smax32=14,var_off=(0x2; 0xfffffffd))
  22: (76) if w6 s>= 0xe goto pc+1      ; R6_w=scalar(smin=umin=umin32=2,smax=umax=0xffffffff,smin32=0x80000012,smax32=13,var_off=(0x2; 0xfffffffd))
  23: (95) exit

  from 22 to 24: R0=0x7fffffff R6_w=14 R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm
  24: R0=0x7fffffff R6_w=14 R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm
  24: (14) w6 -= 14                     ; R6_w=0
  [...]

What can be seen here is a register invariant violation on line 19. After
the binary-or in line 18, the verifier knows that bit 2 is set but knows
nothing about the rest of the content which was loaded from a map value,
meaning, range is [2,0x7fffffff] with var_off=(0x2; 0x7ffffffd). When in
line 19 the verifier analyzes the branch, it splits the register states
in reg_set_min_max() into the registers of the true branch (true_reg1,
true_reg2) and the registers of the false branch (false_reg1, false_reg2).

Since the test is w6 != 0x7ffffffd, the src_reg is a known constant.
Internally, the verifier creates a "fake" register initialized as scalar
to the value of 0x7ffffffd, and then passes it onto reg_set_min_max(). Now,
for line 19, it is mathematically impossible to take the false branch of
this program, yet the verifier analyzes it. It is impossible because the
second bit of r6 will be set due to the prior or operation and the
constant in the condition has that bit unset (hex(fd) == binary(1111 1101).

When the verifier first analyzes the false / fall-through branch, it will
compute an intersection between the var_off of r6 and of the constant. This
is because the verifier creates a "fake" register initialized to the value
of the constant. The intersection result later refines both registers in
regs_refine_cond_op():

  [...]
  t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
  reg1->var_off = tnum_with_subreg(reg1->var_off, t);
  reg2->var_off = tnum_with_subreg(reg2->var_off, t);
  [...]

Since the verifier is analyzing the false branch of the conditional jump,
reg1 is equal to false_reg1 and reg2 is equal to false_reg2, i.e. the reg2
is the "fake" register that was meant to hold a constant value. The resulting
var_off of the intersection says that both registers now hold a known value
of var_off=(0x7fffffff, 0x0) or in other words: this operation manages to
make the verifier think that the "constant" value that was passed in the
jump operation now holds a different value.

Normally this would not be an issue since it should not influence the true
branch, however, false_reg2 and true_reg2 are pointers to the same "fake"
register. Meaning, the false branch can influence the results of the true
branch. In line 24, the verifier assumes R6_w=0, but the actual runtime
value in this case is 1. The fix is simply not passing in the same "fake"
register location as inputs to reg_set_min_max(), but instead making a
copy. Moving the fake_reg into the env also reduces stack consumption by
120 bytes. With this, the verifier successfully rejects invalid accesses
from the test program.

  [0] https://github.com/google/buzzer

Fixes: 67420501e868 ("bpf: generalize reg_set_min_max() to handle non-const register comparisons")
Reported-by: Juan José López Jaimez <jjlopezjaimez@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20240613115310.25383-1-daniel@iogearbox.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 50aa87f8d77f..e4070fb02b11 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -746,6 +746,8 @@ struct bpf_verifier_env {
 	/* Same as scratched_regs but for stack slots */
 	u64 scratched_stack_slots;
 	u64 prev_log_pos, prev_insn_print_pos;
+	/* buffer used to temporary hold constants as scalar registers */
+	struct bpf_reg_state fake_reg[2];
 	/* buffer used to generate temporary string representations,
 	 * e.g., in reg_type_str() to generate reg_type string
 	 */
-- 
cgit v1.2.3


From 9a95c5bfbf02a0a7f5983280fe284a0ff0836c34 Mon Sep 17 00:00:00 2001
From: GUO Zihua <guozihua@huawei.com>
Date: Tue, 7 May 2024 01:25:41 +0000
Subject: ima: Avoid blocking in RCU read-side critical section

A panic happens in ima_match_policy:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
PGD 42f873067 P4D 0
Oops: 0000 [#1] SMP NOPTI
CPU: 5 PID: 1286325 Comm: kubeletmonit.sh
Kdump: loaded Tainted: P
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
               BIOS 0.0.0 02/06/2015
RIP: 0010:ima_match_policy+0x84/0x450
Code: 49 89 fc 41 89 cf 31 ed 89 44 24 14 eb 1c 44 39
      7b 18 74 26 41 83 ff 05 74 20 48 8b 1b 48 3b 1d
      f2 b9 f4 00 0f 84 9c 01 00 00 <44> 85 73 10 74 ea
      44 8b 6b 14 41 f6 c5 01 75 d4 41 f6 c5 02 74 0f
RSP: 0018:ff71570009e07a80 EFLAGS: 00010207
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000200
RDX: ffffffffad8dc7c0 RSI: 0000000024924925 RDI: ff3e27850dea2000
RBP: 0000000000000000 R08: 0000000000000000 R09: ffffffffabfce739
R10: ff3e27810cc42400 R11: 0000000000000000 R12: ff3e2781825ef970
R13: 00000000ff3e2785 R14: 000000000000000c R15: 0000000000000001
FS:  00007f5195b51740(0000)
GS:ff3e278b12d40000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000010 CR3: 0000000626d24002 CR4: 0000000000361ee0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 ima_get_action+0x22/0x30
 process_measurement+0xb0/0x830
 ? page_add_file_rmap+0x15/0x170
 ? alloc_set_pte+0x269/0x4c0
 ? prep_new_page+0x81/0x140
 ? simple_xattr_get+0x75/0xa0
 ? selinux_file_open+0x9d/0xf0
 ima_file_check+0x64/0x90
 path_openat+0x571/0x1720
 do_filp_open+0x9b/0x110
 ? page_counter_try_charge+0x57/0xc0
 ? files_cgroup_alloc_fd+0x38/0x60
 ? __alloc_fd+0xd4/0x250
 ? do_sys_open+0x1bd/0x250
 do_sys_open+0x1bd/0x250
 do_syscall_64+0x5d/0x1d0
 entry_SYSCALL_64_after_hwframe+0x65/0xca

Commit c7423dbdbc9e ("ima: Handle -ESTALE returned by
ima_filter_rule_match()") introduced call to ima_lsm_copy_rule within a
RCU read-side critical section which contains kmalloc with GFP_KERNEL.
This implies a possible sleep and violates limitations of RCU read-side
critical sections on non-PREEMPT systems.

Sleeping within RCU read-side critical section might cause
synchronize_rcu() returning early and break RCU protection, allowing a
UAF to happen.

The root cause of this issue could be described as follows:
|	Thread A	|	Thread B	|
|			|ima_match_policy	|
|			|  rcu_read_lock	|
|ima_lsm_update_rule	|			|
|  synchronize_rcu	|			|
|			|    kmalloc(GFP_KERNEL)|
|			|      sleep		|
==> synchronize_rcu returns early
|  kfree(entry)		|			|
|			|    entry = entry->next|
==> UAF happens and entry now becomes NULL (or could be anything).
|			|    entry->action	|
==> Accessing entry might cause panic.

To fix this issue, we are converting all kmalloc that is called within
RCU read-side critical section to use GFP_ATOMIC.

Fixes: c7423dbdbc9e ("ima: Handle -ESTALE returned by ima_filter_rule_match()")
Cc: stable@vger.kernel.org
Signed-off-by: GUO Zihua <guozihua@huawei.com>
Acked-by: John Johansen <john.johansen@canonical.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
[PM: fixed missing comment, long lines, !CONFIG_IMA_LSM_RULES case]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h | 2 +-
 include/linux/security.h      | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index f804b76cde44..44488b1ab9a9 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -413,7 +413,7 @@ LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring,
 
 #ifdef CONFIG_AUDIT
 LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr,
-	 void **lsmrule)
+	 void **lsmrule, gfp_t gfp)
 LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule)
 LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule)
 LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule)
diff --git a/include/linux/security.h b/include/linux/security.h
index 21cf70346b33..de3af33e6ff5 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2048,7 +2048,8 @@ static inline void security_key_post_create_or_update(struct key *keyring,
 
 #ifdef CONFIG_AUDIT
 #ifdef CONFIG_SECURITY
-int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule);
+int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule,
+			     gfp_t gfp);
 int security_audit_rule_known(struct audit_krule *krule);
 int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule);
 void security_audit_rule_free(void *lsmrule);
@@ -2056,7 +2057,7 @@ void security_audit_rule_free(void *lsmrule);
 #else
 
 static inline int security_audit_rule_init(u32 field, u32 op, char *rulestr,
-					   void **lsmrule)
+					   void **lsmrule, gfp_t gfp)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 79a947bd54348d4f63120aacc30505b3ad81fa59 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@toblux.com>
Date: Thu, 6 Jun 2024 20:52:02 +0200
Subject: ACPI: NUMA: Consolidate header includes

The header file acpi/acpi_numa.h is included whether CONFIG_ACPI is
defined or not.

Include it only once before the #ifdef/#else/#endif preprocessor
directives and fix the following make includecheck warning:

	acpi/acpi_numa.h is included more than once

Signed-off-by: Thorsten Blum <thorsten.blum@toblux.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 28c3fb2bef0d..bb18e7bf8826 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -24,6 +24,7 @@ struct irq_domain_ops;
 #define _LINUX
 #endif
 #include <acpi/acpi.h>
+#include <acpi/acpi_numa.h>
 
 #ifdef	CONFIG_ACPI
 
@@ -35,7 +36,6 @@ struct irq_domain_ops;
 
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
-#include <acpi/acpi_numa.h>
 #include <acpi/acpi_io.h>
 #include <asm/acpi.h>
 
@@ -777,8 +777,6 @@ const char *acpi_get_subsystem_id(acpi_handle handle);
 #define acpi_dev_uid_match(adev, uid2)			(adev && false)
 #define acpi_dev_hid_uid_match(adev, hid2, uid2)	(adev && false)
 
-#include <acpi/acpi_numa.h>
-
 struct fwnode_handle;
 
 static inline bool acpi_dev_found(const char *hid)
-- 
cgit v1.2.3


From d6161b7d8b33aa8756ab2f7eed8cb371c2e7e8ed Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 12 Jun 2024 15:42:29 +0200
Subject: video/logo: Remove linux_serial_image comments

The last user of the serial_console ASCII image was removed in v2.1.115.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/linux/linux_logo.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/linux_logo.h b/include/linux/linux_logo.h
index d4d5b93efe84..e37699b7e839 100644
--- a/include/linux/linux_logo.h
+++ b/include/linux/linux_logo.h
@@ -10,9 +10,6 @@
  *  Copyright (C) 2001 Greg Banks <gnb@alphalink.com.au>
  *  Copyright (C) 2001 Jan-Benedict Glaw <jbglaw@lug-owl.de>
  *  Copyright (C) 2003 Geert Uytterhoeven <geert@linux-m68k.org>
- *
- *  Serial_console ascii image can be any size,
- *  but should contain %s to display the version
  */
 
 #include <linux/init.h>
-- 
cgit v1.2.3


From 633aeefafc9c2a07a76a62be6aac1d73c3e3defa Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 13 Jun 2024 14:18:26 -0700
Subject: scsi: core: Introduce the BLIST_SKIP_IO_HINTS flag

Prepare for skipping the IO Advice Hints Grouping mode page for USB storage
devices.

Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Joao Machado <jocrismachado@gmail.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Christian Heusel <christian@heusel.eu>
Cc: stable@vger.kernel.org
Fixes: 4f53138fffc2 ("scsi: sd: Translate data lifetime information")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240613211828.2077477-2-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/scsi_devinfo.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h
index 6b548dc2c496..1d79a3b536ce 100644
--- a/include/scsi/scsi_devinfo.h
+++ b/include/scsi/scsi_devinfo.h
@@ -69,8 +69,10 @@
 #define BLIST_RETRY_ITF		((__force blist_flags_t)(1ULL << 32))
 /* Always retry ABORTED_COMMAND with ASC 0xc1 */
 #define BLIST_RETRY_ASC_C1	((__force blist_flags_t)(1ULL << 33))
+/* Do not query the IO Advice Hints Grouping mode page */
+#define BLIST_SKIP_IO_HINTS	((__force blist_flags_t)(1ULL << 34))
 
-#define __BLIST_LAST_USED BLIST_RETRY_ASC_C1
+#define __BLIST_LAST_USED BLIST_SKIP_IO_HINTS
 
 #define __BLIST_HIGH_UNUSED (~(__BLIST_LAST_USED | \
 			       (__force blist_flags_t) \
-- 
cgit v1.2.3


From f4a1254f2a076afb0edd473589bf40f9b4d36b41 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 14 Jun 2024 01:04:29 +0100
Subject: io_uring: fix cancellation overwriting req->flags

Only the current owner of a request is allowed to write into req->flags.
Hence, the cancellation path should never touch it. Add a new field
instead of the flag, move it into the 3rd cache line because it should
always be initialised. poll_refs can move further as polling is an
involved process anyway.

It's a minimal patch, in the future we can and should find a better
place for it and remove now unused REQ_F_CANCEL_SEQ.

Fixes: 521223d7c229f ("io_uring/cancel: don't default to setting req->work.cancel_seq")
Cc: stable@vger.kernel.org
Reported-by: Li Shi <sl1589472800@gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/6827b129f8f0ad76fa9d1f0a773de938b240ffab.1718323430.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 7a6b190c7da7..b48570eaa449 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -648,7 +648,7 @@ struct io_kiocb {
 	struct io_rsrc_node		*rsrc_node;
 
 	atomic_t			refs;
-	atomic_t			poll_refs;
+	bool				cancel_seq_set;
 	struct io_task_work		io_task_work;
 	/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
 	struct hlist_node		hash_node;
@@ -657,6 +657,7 @@ struct io_kiocb {
 	/* opcode allocated if it needs to store data for async defer */
 	void				*async_data;
 	/* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */
+	atomic_t			poll_refs;
 	struct io_kiocb			*link;
 	/* custom credentials, valid IFF REQ_F_CREDS is set */
 	const struct cred		*creds;
-- 
cgit v1.2.3


From 88a26c3c2405626e0083a49609c5e8cd6c453d87 Mon Sep 17 00:00:00 2001
From: Chen Wang <unicorn_wang@outlook.com>
Date: Mon, 15 Jan 2024 17:58:46 +0800
Subject: dt-bindings: clock: sophgo: add pll clocks for SG2042

Add bindings for the pll clocks for Sophgo SG2042.

Signed-off-by: Chen Wang <unicorn_wang@outlook.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Guo Ren <guoren@kernel.org>
---
 include/dt-bindings/clock/sophgo,sg2042-pll.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 include/dt-bindings/clock/sophgo,sg2042-pll.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/sophgo,sg2042-pll.h b/include/dt-bindings/clock/sophgo,sg2042-pll.h
new file mode 100644
index 000000000000..2d519b3bf51c
--- /dev/null
+++ b/include/dt-bindings/clock/sophgo,sg2042-pll.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright (C) 2023 Sophgo Technology Inc. All rights reserved.
+ */
+
+#ifndef __DT_BINDINGS_SOPHGO_SG2042_PLL_H__
+#define __DT_BINDINGS_SOPHGO_SG2042_PLL_H__
+
+#define MPLL_CLK			0
+#define FPLL_CLK			1
+#define DPLL0_CLK			2
+#define DPLL1_CLK			3
+
+#endif /* __DT_BINDINGS_SOPHGO_SG2042_PLL_H__ */
-- 
cgit v1.2.3


From 5a7144d61d73d801540f8846d8e1b9fa52a5559c Mon Sep 17 00:00:00 2001
From: Chen Wang <unicorn_wang@outlook.com>
Date: Wed, 31 Jan 2024 09:57:01 +0800
Subject: dt-bindings: clock: sophgo: add RP gate clocks for SG2042

Add bindings for the gate clocks of RP subsystem for Sophgo SG2042.

Signed-off-by: Chen Wang <unicorn_wang@outlook.com>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 include/dt-bindings/clock/sophgo,sg2042-rpgate.h | 58 ++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 include/dt-bindings/clock/sophgo,sg2042-rpgate.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/sophgo,sg2042-rpgate.h b/include/dt-bindings/clock/sophgo,sg2042-rpgate.h
new file mode 100644
index 000000000000..8b4522d5f559
--- /dev/null
+++ b/include/dt-bindings/clock/sophgo,sg2042-rpgate.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright (C) 2023 Sophgo Technology Inc. All rights reserved.
+ */
+
+#ifndef __DT_BINDINGS_SOPHGO_SG2042_RPGATE_H__
+#define __DT_BINDINGS_SOPHGO_SG2042_RPGATE_H__
+
+#define GATE_CLK_RXU0			0
+#define GATE_CLK_RXU1			1
+#define GATE_CLK_RXU2			2
+#define GATE_CLK_RXU3			3
+#define GATE_CLK_RXU4			4
+#define GATE_CLK_RXU5			5
+#define GATE_CLK_RXU6			6
+#define GATE_CLK_RXU7			7
+#define GATE_CLK_RXU8			8
+#define GATE_CLK_RXU9			9
+#define GATE_CLK_RXU10			10
+#define GATE_CLK_RXU11			11
+#define GATE_CLK_RXU12			12
+#define GATE_CLK_RXU13			13
+#define GATE_CLK_RXU14			14
+#define GATE_CLK_RXU15			15
+#define GATE_CLK_RXU16			16
+#define GATE_CLK_RXU17			17
+#define GATE_CLK_RXU18			18
+#define GATE_CLK_RXU19			19
+#define GATE_CLK_RXU20			20
+#define GATE_CLK_RXU21			21
+#define GATE_CLK_RXU22			22
+#define GATE_CLK_RXU23			23
+#define GATE_CLK_RXU24			24
+#define GATE_CLK_RXU25			25
+#define GATE_CLK_RXU26			26
+#define GATE_CLK_RXU27			27
+#define GATE_CLK_RXU28			28
+#define GATE_CLK_RXU29			29
+#define GATE_CLK_RXU30			30
+#define GATE_CLK_RXU31			31
+#define GATE_CLK_MP0			32
+#define GATE_CLK_MP1			33
+#define GATE_CLK_MP2			34
+#define GATE_CLK_MP3			35
+#define GATE_CLK_MP4			36
+#define GATE_CLK_MP5			37
+#define GATE_CLK_MP6			38
+#define GATE_CLK_MP7			39
+#define GATE_CLK_MP8			40
+#define GATE_CLK_MP9			41
+#define GATE_CLK_MP10			42
+#define GATE_CLK_MP11			43
+#define GATE_CLK_MP12			44
+#define GATE_CLK_MP13			45
+#define GATE_CLK_MP14			46
+#define GATE_CLK_MP15			47
+
+#endif /* __DT_BINDINGS_SOPHGO_SG2042_RPGATE_H__ */
-- 
cgit v1.2.3


From 5911423798b2eba65d6ea0aff6505280b3eb55e6 Mon Sep 17 00:00:00 2001
From: Chen Wang <unicorn_wang@outlook.com>
Date: Fri, 24 Nov 2023 14:02:43 +0800
Subject: dt-bindings: clock: sophgo: add clkgen for SG2042

Add bindings for the clock generator of divider/mux and gates working
for other subsystem than RP subsystem for Sophgo SG2042.

Signed-off-by: Chen Wang <unicorn_wang@outlook.com>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 include/dt-bindings/clock/sophgo,sg2042-clkgen.h | 111 +++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 include/dt-bindings/clock/sophgo,sg2042-clkgen.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/sophgo,sg2042-clkgen.h b/include/dt-bindings/clock/sophgo,sg2042-clkgen.h
new file mode 100644
index 000000000000..84f7857317a2
--- /dev/null
+++ b/include/dt-bindings/clock/sophgo,sg2042-clkgen.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright (C) 2023 Sophgo Technology Inc. All rights reserved.
+ */
+
+#ifndef __DT_BINDINGS_SOPHGO_SG2042_CLKGEN_H__
+#define __DT_BINDINGS_SOPHGO_SG2042_CLKGEN_H__
+
+#define DIV_CLK_MPLL_RP_CPU_NORMAL_0	0
+#define DIV_CLK_MPLL_AXI_DDR_0		1
+#define DIV_CLK_FPLL_DDR01_1		2
+#define DIV_CLK_FPLL_DDR23_1		3
+#define DIV_CLK_FPLL_RP_CPU_NORMAL_1	4
+#define DIV_CLK_FPLL_50M_A53		5
+#define DIV_CLK_FPLL_TOP_RP_CMN_DIV2	6
+#define DIV_CLK_FPLL_UART_500M		7
+#define DIV_CLK_FPLL_AHB_LPC		8
+#define DIV_CLK_FPLL_EFUSE		9
+#define DIV_CLK_FPLL_TX_ETH0		10
+#define DIV_CLK_FPLL_PTP_REF_I_ETH0	11
+#define DIV_CLK_FPLL_REF_ETH0		12
+#define DIV_CLK_FPLL_EMMC		13
+#define DIV_CLK_FPLL_SD			14
+#define DIV_CLK_FPLL_TOP_AXI0		15
+#define DIV_CLK_FPLL_TOP_AXI_HSPERI	16
+#define DIV_CLK_FPLL_AXI_DDR_1		17
+#define DIV_CLK_FPLL_DIV_TIMER1		18
+#define DIV_CLK_FPLL_DIV_TIMER2		19
+#define DIV_CLK_FPLL_DIV_TIMER3		20
+#define DIV_CLK_FPLL_DIV_TIMER4		21
+#define DIV_CLK_FPLL_DIV_TIMER5		22
+#define DIV_CLK_FPLL_DIV_TIMER6		23
+#define DIV_CLK_FPLL_DIV_TIMER7		24
+#define DIV_CLK_FPLL_DIV_TIMER8		25
+#define DIV_CLK_FPLL_100K_EMMC		26
+#define DIV_CLK_FPLL_100K_SD		27
+#define DIV_CLK_FPLL_GPIO_DB		28
+#define DIV_CLK_DPLL0_DDR01_0		29
+#define DIV_CLK_DPLL1_DDR23_0		30
+
+#define GATE_CLK_RP_CPU_NORMAL_DIV0	31
+#define GATE_CLK_AXI_DDR_DIV0		32
+
+#define GATE_CLK_RP_CPU_NORMAL_DIV1	33
+#define GATE_CLK_A53_50M		34
+#define GATE_CLK_TOP_RP_CMN_DIV2	35
+#define GATE_CLK_HSDMA			36
+#define GATE_CLK_EMMC_100M		37
+#define GATE_CLK_SD_100M		38
+#define GATE_CLK_TX_ETH0		39
+#define GATE_CLK_PTP_REF_I_ETH0		40
+#define GATE_CLK_REF_ETH0		41
+#define GATE_CLK_UART_500M		42
+#define GATE_CLK_EFUSE			43
+
+#define GATE_CLK_AHB_LPC		44
+#define GATE_CLK_AHB_ROM		45
+#define GATE_CLK_AHB_SF			46
+
+#define GATE_CLK_APB_UART		47
+#define GATE_CLK_APB_TIMER		48
+#define GATE_CLK_APB_EFUSE		49
+#define GATE_CLK_APB_GPIO		50
+#define GATE_CLK_APB_GPIO_INTR		51
+#define GATE_CLK_APB_SPI		52
+#define GATE_CLK_APB_I2C		53
+#define GATE_CLK_APB_WDT		54
+#define GATE_CLK_APB_PWM		55
+#define GATE_CLK_APB_RTC		56
+
+#define GATE_CLK_AXI_PCIE0		57
+#define GATE_CLK_AXI_PCIE1		58
+#define GATE_CLK_SYSDMA_AXI		59
+#define GATE_CLK_AXI_DBG_I2C		60
+#define GATE_CLK_AXI_SRAM		61
+#define GATE_CLK_AXI_ETH0		62
+#define GATE_CLK_AXI_EMMC		63
+#define GATE_CLK_AXI_SD			64
+#define GATE_CLK_TOP_AXI0		65
+#define GATE_CLK_TOP_AXI_HSPERI		66
+
+#define GATE_CLK_TIMER1			67
+#define GATE_CLK_TIMER2			68
+#define GATE_CLK_TIMER3			69
+#define GATE_CLK_TIMER4			70
+#define GATE_CLK_TIMER5			71
+#define GATE_CLK_TIMER6			72
+#define GATE_CLK_TIMER7			73
+#define GATE_CLK_TIMER8			74
+#define GATE_CLK_100K_EMMC		75
+#define GATE_CLK_100K_SD		76
+#define GATE_CLK_GPIO_DB		77
+
+#define GATE_CLK_AXI_DDR_DIV1		78
+#define GATE_CLK_DDR01_DIV1		79
+#define GATE_CLK_DDR23_DIV1		80
+
+#define GATE_CLK_DDR01_DIV0		81
+#define GATE_CLK_DDR23_DIV0		82
+
+#define GATE_CLK_DDR01			83
+#define GATE_CLK_DDR23			84
+#define GATE_CLK_RP_CPU_NORMAL		85
+#define GATE_CLK_AXI_DDR		86
+
+#define MUX_CLK_DDR01			87
+#define MUX_CLK_DDR23			88
+#define MUX_CLK_RP_CPU_NORMAL		89
+#define MUX_CLK_AXI_DDR			90
+
+#endif /* __DT_BINDINGS_SOPHGO_SG2042_CLKGEN_H__ */
-- 
cgit v1.2.3


From 6b0d3355e5a58fd73529fa6f930a877caca3f75d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sun, 26 May 2024 20:17:16 +0200
Subject: leds: class: Add flag to avoid automatic renaming of LED devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a mechanism for drivers to opt-out of the automatic device renaming
on conflicts.
Those drivers will provide their own conflict resolution.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20240526-cros_ec-kbd-led-framework-v3-2-ee577415a521@weissschuh.net
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/leds.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 6300313c46b7..36663ac6c58a 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -107,6 +107,7 @@ struct led_classdev {
 #define LED_BRIGHT_HW_CHANGED	BIT(21)
 #define LED_RETAIN_AT_SHUTDOWN	BIT(22)
 #define LED_INIT_DEFAULT_TRIGGER BIT(23)
+#define LED_REJECT_NAME_CONFLICT BIT(24)
 
 	/* set_brightness_work / blink_timer flags, atomic, private. */
 	unsigned long		work_flags;
-- 
cgit v1.2.3


From 146a06a0d225cae240065233fd168fb0b95a10ff Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Sat, 8 Jun 2024 11:01:13 +0200
Subject: HID: rename struct hid_bpf_ops into hid_ops

Those operations are the ones from HID, not HID-BPF, and I'd like to
reuse hid_bpf_ops as the user facing struct_ops API.

Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-1-6ac6ade58329@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index eec2592dec12..a66103618e6e 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -97,7 +97,7 @@ enum hid_bpf_prog_type {
 
 struct hid_report_enum;
 
-struct hid_bpf_ops {
+struct hid_ops {
 	struct hid_report *(*hid_get_report)(struct hid_report_enum *report_enum, const u8 *data);
 	int (*hid_hw_raw_request)(struct hid_device *hdev,
 				  unsigned char reportnum, __u8 *buf,
@@ -110,7 +110,7 @@ struct hid_bpf_ops {
 	const struct bus_type *bus_type;
 };
 
-extern struct hid_bpf_ops *hid_bpf_ops;
+extern struct hid_ops *hid_ops;
 
 struct hid_bpf_prog_list {
 	u16 prog_idx[HID_BPF_MAX_PROGS_PER_DEV];
-- 
cgit v1.2.3


From ebc0d8093e8c97de459615438edefad1a4ac352c Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Sat, 8 Jun 2024 11:01:15 +0200
Subject: HID: bpf: implement HID-BPF through bpf_struct_ops

We do this implementation in several steps to not have the CI failing:
- first (this patch), we add struct_ops while keeping the existing infra
  available
- then we change the selftests, the examples and the existing in-tree
  HID-BPF programs
- then we remove the existing trace points making old HID-BPF obsolete

There are a few advantages of struct_ops over tracing:
- compatibility with sleepable programs (for hid_hw_raw_request() in
  a later patch)
- a lot simpler in the kernel: it's a simple rcu protected list
- we can add more parameters to the function called without much trouble
- the "attach" is now generic through BPF-core: the caller just needs to
  set hid_id and flags before calling __load().
- all the BPF tough part is not handled in BPF-core through generic
  processing
- hid_bpf_ctx is now only writable where it needs be

Acked-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-3-6ac6ade58329@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index a66103618e6e..c4f4ce10b7dd 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -65,11 +65,12 @@ struct hid_bpf_ctx {
  * @HID_BPF_FLAG_INSERT_HEAD: insert the given program before any other program
  *                            currently attached to the device. This doesn't
  *                            guarantee that this program will always be first
- * @HID_BPF_FLAG_MAX: sentinel value, not to be used by the callers
  */
 enum hid_bpf_attach_flags {
 	HID_BPF_FLAG_NONE = 0,
 	HID_BPF_FLAG_INSERT_HEAD = _BITUL(0),
+
+	/* private: internal use only */
 	HID_BPF_FLAG_MAX,
 };
 
@@ -112,6 +113,60 @@ struct hid_ops {
 
 extern struct hid_ops *hid_ops;
 
+/**
+ * struct hid_bpf_ops - A BPF struct_ops of callbacks allowing to attach HID-BPF
+ *			programs to a HID device
+ * @hid_id: the HID uniq ID to attach to. This is writeable before ``load()``, and
+ *	    cannot be changed after
+ * @flags: flags used while attaching the struct_ops to the device. Currently only
+ *	   available value is %0 or ``BPF_F_BEFORE``.
+ *	   Writeable only before ``load()``
+ */
+struct hid_bpf_ops {
+	/* hid_id needs to stay first so we can easily change it
+	 * from userspace.
+	 */
+	int			hid_id;
+	u32			flags;
+
+	/* private: do not show up in the docs */
+	struct list_head	list;
+
+	/* public: rest should show up in the docs */
+
+	/**
+	 * @hid_device_event: called whenever an event is coming in from the device
+	 *
+	 * It has the following arguments:
+	 *
+	 * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx
+	 *
+	 * Return: %0 on success and keep processing; a positive
+	 * value to change the incoming size buffer; a negative
+	 * error code to interrupt the processing of this event
+	 *
+	 * Context: Interrupt context.
+	 */
+	int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type);
+
+	/**
+	 * @hid_rdesc_fixup: called when the probe function parses the report descriptor
+	 * of the HID device
+	 *
+	 * It has the following arguments:
+	 *
+	 * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx
+	 *
+	 * Return: %0 on success and keep processing; a positive
+	 * value to change the incoming size buffer; a negative
+	 * error code to interrupt the processing of this device
+	 */
+	int (*hid_rdesc_fixup)(struct hid_bpf_ctx *ctx);
+
+	/* private: do not show up in the docs */
+	struct hid_device *hdev;
+};
+
 struct hid_bpf_prog_list {
 	u16 prog_idx[HID_BPF_MAX_PROGS_PER_DEV];
 	u8 prog_cnt;
@@ -129,6 +184,10 @@ struct hid_bpf {
 	bool destroyed;			/* prevents the assignment of any progs */
 
 	spinlock_t progs_lock;		/* protects RCU update of progs */
+
+	struct hid_bpf_ops *rdesc_ops;
+	struct list_head prog_list;
+	struct mutex prog_list_lock;	/* protects prog_list update */
 };
 
 /* specific HID-BPF link when a program is attached to a device */
-- 
cgit v1.2.3


From 4a86220e046da009bef0948e9f51d1d26d68f93c Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Sat, 8 Jun 2024 11:01:20 +0200
Subject: HID: bpf: remove tracing HID-BPF capability

We can now rely on struct_ops as we cleared the users in-tree.

Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-8-6ac6ade58329@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 54 +------------------------------------------------
 1 file changed, 1 insertion(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index c4f4ce10b7dd..447b94aa99ab 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -4,7 +4,7 @@
 #define __HID_BPF_H
 
 #include <linux/bpf.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <uapi/linux/hid.h>
 
 struct hid_device;
@@ -24,11 +24,7 @@ struct hid_device;
  *
  * All of these fields are currently read-only.
  *
- * @index: program index in the jump table. No special meaning (a smaller index
- *         doesn't mean the program will be executed before another program with
- *         a bigger index).
  * @hid: the ``struct hid_device`` representing the device itself
- * @report_type: used for ``hid_bpf_device_event()``
  * @allocated_size: Allocated size of data.
  *
  *                  This is how much memory is available and can be requested
@@ -47,54 +43,21 @@ struct hid_device;
  * @retval: Return value of the previous program.
  */
 struct hid_bpf_ctx {
-	__u32 index;
 	const struct hid_device *hid;
 	__u32 allocated_size;
-	enum hid_report_type report_type;
 	union {
 		__s32 retval;
 		__s32 size;
 	};
 };
 
-/**
- * enum hid_bpf_attach_flags - flags used when attaching a HIF-BPF program
- *
- * @HID_BPF_FLAG_NONE: no specific flag is used, the kernel choses where to
- *                     insert the program
- * @HID_BPF_FLAG_INSERT_HEAD: insert the given program before any other program
- *                            currently attached to the device. This doesn't
- *                            guarantee that this program will always be first
- */
-enum hid_bpf_attach_flags {
-	HID_BPF_FLAG_NONE = 0,
-	HID_BPF_FLAG_INSERT_HEAD = _BITUL(0),
-
-	/* private: internal use only */
-	HID_BPF_FLAG_MAX,
-};
-
-/* Following functions are tracepoints that BPF programs can attach to */
-int hid_bpf_device_event(struct hid_bpf_ctx *ctx);
-int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx);
-
 /*
  * Below is HID internal
  */
 
-/* internal function to call eBPF programs, not to be used by anybody */
-int __hid_bpf_tail_call(struct hid_bpf_ctx *ctx);
-
 #define HID_BPF_MAX_PROGS_PER_DEV 64
 #define HID_BPF_FLAG_MASK (((HID_BPF_FLAG_MAX - 1) << 1) - 1)
 
-/* types of HID programs to attach to */
-enum hid_bpf_prog_type {
-	HID_BPF_PROG_TYPE_UNDEF = -1,
-	HID_BPF_PROG_TYPE_DEVICE_EVENT,			/* an event is emitted from the device */
-	HID_BPF_PROG_TYPE_RDESC_FIXUP,
-	HID_BPF_PROG_TYPE_MAX,
-};
 
 struct hid_report_enum;
 
@@ -167,11 +130,6 @@ struct hid_bpf_ops {
 	struct hid_device *hdev;
 };
 
-struct hid_bpf_prog_list {
-	u16 prog_idx[HID_BPF_MAX_PROGS_PER_DEV];
-	u8 prog_cnt;
-};
-
 /* stored in each device */
 struct hid_bpf {
 	u8 *device_data;		/* allocated when a bpf program of type
@@ -179,23 +137,13 @@ struct hid_bpf {
 					 * to this HID device
 					 */
 	u32 allocated_data;
-
-	struct hid_bpf_prog_list __rcu *progs[HID_BPF_PROG_TYPE_MAX];	/* attached BPF progs */
 	bool destroyed;			/* prevents the assignment of any progs */
 
-	spinlock_t progs_lock;		/* protects RCU update of progs */
-
 	struct hid_bpf_ops *rdesc_ops;
 	struct list_head prog_list;
 	struct mutex prog_list_lock;	/* protects prog_list update */
 };
 
-/* specific HID-BPF link when a program is attached to a device */
-struct hid_bpf_link {
-	struct bpf_link link;
-	int hid_table_index;
-};
-
 #ifdef CONFIG_HID_BPF
 u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data,
 				  u32 *size, int interrupt);
-- 
cgit v1.2.3


From c5958697a5fa29d3ba9332205a88725afe9ed912 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Sat, 8 Jun 2024 11:01:22 +0200
Subject: Documentation: HID: amend HID-BPF for struct_ops

Now that we are using struct_ops, the docs need to be changed.

Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-10-6ac6ade58329@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index 447b94aa99ab..1b4cc1b2c31d 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -20,11 +20,9 @@ struct hid_device;
  * struct hid_bpf_ctx - User accessible data for all HID programs
  *
  * ``data`` is not directly accessible from the context. We need to issue
- * a call to ``hid_bpf_get_data()`` in order to get a pointer to that field.
+ * a call to hid_bpf_get_data() in order to get a pointer to that field.
  *
- * All of these fields are currently read-only.
- *
- * @hid: the ``struct hid_device`` representing the device itself
+ * @hid: the &struct hid_device representing the device itself
  * @allocated_size: Allocated size of data.
  *
  *                  This is how much memory is available and can be requested
@@ -41,6 +39,8 @@ struct hid_device;
  *        ``size`` must always be less or equal than ``allocated_size`` (it is enforced
  *        once all BPF programs have been run).
  * @retval: Return value of the previous program.
+ *
+ * ``hid`` and ``allocated_size`` are read-only, ``size`` and ``retval`` are read-write.
  */
 struct hid_bpf_ctx {
 	const struct hid_device *hid;
-- 
cgit v1.2.3


From 33c0fb85b571b0f1bbdbf466e770eebeb29e6f41 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Sat, 8 Jun 2024 11:01:28 +0200
Subject: HID: bpf: make part of struct hid_device writable

It is useful to change the name, the phys and/or the uniq of a
struct hid_device during .rdesc_fixup().

For example, hid-uclogic.ko changes the uniq to store the firmware version
to differentiate between 2 devices sharing the same PID. In the same
way, changing the device name is useful when the device export 3 nodes,
all with the same name.

Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-16-6ac6ade58329@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index 1b4cc1b2c31d..65d7e0acc8c2 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -43,7 +43,7 @@ struct hid_device;
  * ``hid`` and ``allocated_size`` are read-only, ``size`` and ``retval`` are read-write.
  */
 struct hid_bpf_ctx {
-	const struct hid_device *hid;
+	struct hid_device *hid;
 	__u32 allocated_size;
 	union {
 		__s32 retval;
-- 
cgit v1.2.3


From 5e5f2f92cccc29f356422d3cbc104f7f42430f22 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Fri, 14 Jun 2024 02:43:39 +0300
Subject: platform: arm64: add Lenovo Yoga C630 WOS EC driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lenovo Yoga C630 WOS is a laptop using Snapdragon 850 SoC. Like many
laptops it uses an embedded controller (EC) to perform various platform
operations, including, but not limited, to Type-C port control or power
supply handlng.

Add the driver for the EC, that creates devices for UCSI and power
supply devices.

Reviewed-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20240614-yoga-ec-driver-v7-2-9f0b9b40ae76@linaro.org
[ij: added #include <linux/cleanup.h>]
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/lenovo-yoga-c630.h | 44 ++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 include/linux/platform_data/lenovo-yoga-c630.h

(limited to 'include')

diff --git a/include/linux/platform_data/lenovo-yoga-c630.h b/include/linux/platform_data/lenovo-yoga-c630.h
new file mode 100644
index 000000000000..5d1f9fb33cfc
--- /dev/null
+++ b/include/linux/platform_data/lenovo-yoga-c630.h
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022-2024, Linaro Ltd
+ * Authors:
+ *    Bjorn Andersson
+ *    Dmitry Baryshkov
+ */
+
+#ifndef _LENOVO_YOGA_C630_DATA_H
+#define _LENOVO_YOGA_C630_DATA_H
+
+struct yoga_c630_ec;
+struct notifier_block;
+
+#define YOGA_C630_MOD_NAME	"lenovo_yoga_c630"
+
+#define YOGA_C630_DEV_UCSI	"ucsi"
+#define YOGA_C630_DEV_PSY	"psy"
+
+int yoga_c630_ec_read8(struct yoga_c630_ec *ec, u8 addr);
+int yoga_c630_ec_read16(struct yoga_c630_ec *ec, u8 addr);
+
+int yoga_c630_ec_register_notify(struct yoga_c630_ec *ec, struct notifier_block *nb);
+void yoga_c630_ec_unregister_notify(struct yoga_c630_ec *ec, struct notifier_block *nb);
+
+#define YOGA_C630_UCSI_WRITE_SIZE	8
+#define YOGA_C630_UCSI_CCI_SIZE		4
+#define YOGA_C630_UCSI_DATA_SIZE	16
+#define YOGA_C630_UCSI_READ_SIZE	(YOGA_C630_UCSI_CCI_SIZE + YOGA_C630_UCSI_DATA_SIZE)
+
+u16 yoga_c630_ec_ucsi_get_version(struct yoga_c630_ec *ec);
+int yoga_c630_ec_ucsi_write(struct yoga_c630_ec *ec,
+			    const u8 req[YOGA_C630_UCSI_WRITE_SIZE]);
+int yoga_c630_ec_ucsi_read(struct yoga_c630_ec *ec,
+			   u8 resp[YOGA_C630_UCSI_READ_SIZE]);
+
+#define LENOVO_EC_EVENT_USB		0x20
+#define LENOVO_EC_EVENT_UCSI		0x21
+#define LENOVO_EC_EVENT_HPD		0x22
+#define LENOVO_EC_EVENT_BAT_STATUS	0x24
+#define LENOVO_EC_EVENT_BAT_INFO	0x25
+#define LENOVO_EC_EVENT_BAT_ADPT_STATUS	0x37
+
+#endif
-- 
cgit v1.2.3


From 1652b0bafeaa8281ca9a805d81e13d7647bd2f44 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 31 May 2024 09:48:08 +0200
Subject: block: remove unused queue limits API

Remove all APIs that are unused now that sd and sr have been converted
to the atomic queue limits API.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240531074837.1648501-14-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 24 ------------------------
 1 file changed, 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 24c36929920b..ad995e7a7698 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -332,8 +332,6 @@ struct queue_limits {
 typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
 			       void *data);
 
-void disk_set_zoned(struct gendisk *disk);
-
 #define BLK_ALL_ZONES  ((unsigned int)-1)
 int blkdev_report_zones(struct block_device *bdev, sector_t sector,
 		unsigned int nr_zones, report_zones_cb cb, void *data);
@@ -638,18 +636,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
 	return sector >> ilog2(disk->queue->limits.chunk_sectors);
 }
 
-static inline void disk_set_max_open_zones(struct gendisk *disk,
-		unsigned int max_open_zones)
-{
-	disk->queue->limits.max_open_zones = max_open_zones;
-}
-
-static inline void disk_set_max_active_zones(struct gendisk *disk,
-		unsigned int max_active_zones)
-{
-	disk->queue->limits.max_active_zones = max_active_zones;
-}
-
 static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
 {
 	return bdev->bd_disk->queue->limits.max_open_zones;
@@ -929,24 +915,14 @@ static inline void queue_limits_cancel_update(struct request_queue *q)
 /*
  * Access functions for manipulating queue properties
  */
-extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 void blk_queue_max_secure_erase_sectors(struct request_queue *q,
 		unsigned int max_sectors);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
 extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
 		unsigned int max_write_same_sectors);
-extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
-extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
-		unsigned int max_zone_append_sectors);
-extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
-void blk_queue_zone_write_granularity(struct request_queue *q,
-				      unsigned int size);
-extern void blk_queue_alignment_offset(struct request_queue *q,
-				       unsigned int alignment);
 void disk_update_readahead(struct gendisk *disk);
 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
-extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
 extern void blk_set_stacking_limits(struct queue_limits *lim);
-- 
cgit v1.2.3


From 73e3715ed14844067c5c598e72777641004a7f60 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 31 May 2024 09:48:09 +0200
Subject: block: add special APIs for run-time disabling of discard and friends

A few drivers optimistically try to support discard, write zeroes and
secure erase and disable the features from the I/O completion handler
if the hardware can't support them.  This disable can't be done using
the atomic queue limits API because the I/O completion handlers can't
take sleeping locks or freeze the queue.  Keep the existing clearing
of the relevant field to zero, but replace the old blk_queue_max_*
APIs with new disable APIs that force the value to 0.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240531074837.1648501-15-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ad995e7a7698..ac8e0cb2353a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -912,15 +912,31 @@ static inline void queue_limits_cancel_update(struct request_queue *q)
 	mutex_unlock(&q->limits_lock);
 }
 
+/*
+ * These helpers are for drivers that have sloppy feature negotiation and might
+ * have to disable DISCARD, WRITE_ZEROES or SECURE_DISCARD from the I/O
+ * completion handler when the device returned an indicator that the respective
+ * feature is not actually supported.  They are racy and the driver needs to
+ * cope with that.  Try to avoid this scheme if you can.
+ */
+static inline void blk_queue_disable_discard(struct request_queue *q)
+{
+	q->limits.max_discard_sectors = 0;
+}
+
+static inline void blk_queue_disable_secure_erase(struct request_queue *q)
+{
+	q->limits.max_secure_erase_sectors = 0;
+}
+
+static inline void blk_queue_disable_write_zeroes(struct request_queue *q)
+{
+	q->limits.max_write_zeroes_sectors = 0;
+}
+
 /*
  * Access functions for manipulating queue properties
  */
-void blk_queue_max_secure_erase_sectors(struct request_queue *q,
-		unsigned int max_sectors);
-extern void blk_queue_max_discard_sectors(struct request_queue *q,
-		unsigned int max_discard_sectors);
-extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
-		unsigned int max_write_same_sectors);
 void disk_update_readahead(struct gendisk *disk);
 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
-- 
cgit v1.2.3


From e9f5f44ad3725335d9c559c3c22cd3726152a7b1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 13 Jun 2024 10:48:15 +0200
Subject: block: remove the blk_integrity_profile structure

Block layer integrity configuration is a bit complex right now, as it
indirects through operation vectors for a simple two-dimensional
configuration:

 a) the checksum type of none, ip checksum, crc, crc64
 b) the presence or absence of a reference tag

Remove the integrity profile, and instead add a separate csum_type flag
which replaces the existing ip-checksum field and a new flag that
indicates the presence of the reference tag.

This removes up to two layers of indirect calls, remove the need to
offload the no-op verification of non-PI metadata to a workqueue and
generally simplifies the code. The downside is that block/t10-pi.c now
has to be built into the kernel when CONFIG_BLK_DEV_INTEGRITY is
supported.  Given that both nvme and SCSI require t10-pi.ko, it is loaded
for all usual configurations that enabled CONFIG_BLK_DEV_INTEGRITY
already, though.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240613084839.1044015-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-integrity.h | 35 ++++++++++-------------------------
 include/linux/blkdev.h        |  9 ++++++++-
 include/linux/t10-pi.h        |  8 --------
 3 files changed, 18 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 7428cb43952d..56ce1ae35580 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -10,7 +10,7 @@ enum blk_integrity_flags {
 	BLK_INTEGRITY_VERIFY		= 1 << 0,
 	BLK_INTEGRITY_GENERATE		= 1 << 1,
 	BLK_INTEGRITY_DEVICE_CAPABLE	= 1 << 2,
-	BLK_INTEGRITY_IP_CHECKSUM	= 1 << 3,
+	BLK_INTEGRITY_REF_TAG		= 1 << 3,
 };
 
 struct blk_integrity_iter {
@@ -19,22 +19,10 @@ struct blk_integrity_iter {
 	sector_t		seed;
 	unsigned int		data_size;
 	unsigned short		interval;
-	unsigned char		tuple_size;
-	unsigned char		pi_offset;
 	const char		*disk_name;
 };
 
-typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
-typedef void (integrity_prepare_fn) (struct request *);
-typedef void (integrity_complete_fn) (struct request *, unsigned int);
-
-struct blk_integrity_profile {
-	integrity_processing_fn		*generate_fn;
-	integrity_processing_fn		*verify_fn;
-	integrity_prepare_fn		*prepare_fn;
-	integrity_complete_fn		*complete_fn;
-	const char			*name;
-};
+const char *blk_integrity_profile_name(struct blk_integrity *bi);
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 void blk_integrity_register(struct gendisk *, struct blk_integrity *);
@@ -44,14 +32,17 @@ int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
 				   struct scatterlist *);
 int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
 
-static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+static inline bool
+blk_integrity_queue_supports_integrity(struct request_queue *q)
 {
-	struct blk_integrity *bi = &disk->queue->integrity;
+	return q->integrity.tuple_size;
+}
 
-	if (!bi->profile)
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+{
+	if (!blk_integrity_queue_supports_integrity(disk->queue))
 		return NULL;
-
-	return bi;
+	return &disk->queue->integrity;
 }
 
 static inline struct blk_integrity *
@@ -60,12 +51,6 @@ bdev_get_integrity(struct block_device *bdev)
 	return blk_get_integrity(bdev->bd_disk);
 }
 
-static inline bool
-blk_integrity_queue_supports_integrity(struct request_queue *q)
-{
-	return q->integrity.profile;
-}
-
 static inline unsigned short
 queue_max_integrity_segments(const struct request_queue *q)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ac8e0cb2353a..bdd33388e1ce 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -105,9 +105,16 @@ enum {
 struct disk_events;
 struct badblocks;
 
+enum blk_integrity_checksum {
+	BLK_INTEGRITY_CSUM_NONE		= 0,
+	BLK_INTEGRITY_CSUM_IP		= 1,
+	BLK_INTEGRITY_CSUM_CRC		= 2,
+	BLK_INTEGRITY_CSUM_CRC64	= 3,
+} __packed ;
+
 struct blk_integrity {
-	const struct blk_integrity_profile	*profile;
 	unsigned char				flags;
+	enum blk_integrity_checksum		csum_type;
 	unsigned char				tuple_size;
 	unsigned char				pi_offset;
 	unsigned char				interval_exp;
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 248f4ac95642..d2bafb76badf 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -48,11 +48,6 @@ static inline u32 t10_pi_ref_tag(struct request *rq)
 	return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff;
 }
 
-extern const struct blk_integrity_profile t10_pi_type1_crc;
-extern const struct blk_integrity_profile t10_pi_type1_ip;
-extern const struct blk_integrity_profile t10_pi_type3_crc;
-extern const struct blk_integrity_profile t10_pi_type3_ip;
-
 struct crc64_pi_tuple {
 	__be64 guard_tag;
 	__be16 app_tag;
@@ -79,7 +74,4 @@ static inline u64 ext_pi_ref_tag(struct request *rq)
 	return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT));
 }
 
-extern const struct blk_integrity_profile ext_pi_type1_crc64;
-extern const struct blk_integrity_profile ext_pi_type3_crc64;
-
 #endif
-- 
cgit v1.2.3


From 3c3e85ddffae93eba1a257eb6939bf5dc1e93b9e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 13 Jun 2024 10:48:20 +0200
Subject: block: bypass the STABLE_WRITES flag for protection information

Currently registering a checksum-enabled (aka PI) integrity profile sets
the QUEUE_FLAG_STABLE_WRITE flag, and unregistering it clears the flag.
This can incorrectly clear the flag when the driver requires stable
writes even without PI, e.g. in case of iSCSI or NVMe/TCP with data
digest enabled.

Fix this by looking at the csum_type directly in bdev_stable_writes and
not setting the queue flag.  Also remove the blk_queue_stable_writes
helper as the only user in nvme wants to only look at the actual
QUEUE_FLAG_STABLE_WRITE flag as it inherits the integrity configuration
by other means.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240613084839.1044015-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bdd33388e1ce..f9089750919c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -571,8 +571,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_noxmerges(q)	\
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
-#define blk_queue_stable_writes(q) \
-	test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags)
 #define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 #define blk_queue_add_random(q)	test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
 #define blk_queue_zone_resetall(q)	\
@@ -1300,8 +1298,14 @@ static inline bool bdev_synchronous(struct block_device *bdev)
 
 static inline bool bdev_stable_writes(struct block_device *bdev)
 {
-	return test_bit(QUEUE_FLAG_STABLE_WRITES,
-			&bdev_get_queue(bdev)->queue_flags);
+	struct request_queue *q = bdev_get_queue(bdev);
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+	/* BLK_INTEGRITY_CSUM_NONE is not available in blkdev.h */
+	if (q->integrity.csum_type != 0)
+		return true;
+#endif
+	return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags);
 }
 
 static inline bool bdev_write_cache(struct block_device *bdev)
-- 
cgit v1.2.3


From 9f4aa46f2a7401025d8561495cf8740f773310fc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 13 Jun 2024 10:48:21 +0200
Subject: block: invert the BLK_INTEGRITY_{GENERATE,VERIFY} flags

Invert the flags so that user set values will be able to persist
revalidating the integrity information once we switch the integrity
information to queue_limits.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240613084839.1044015-12-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-integrity.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 56ce1ae35580..bafa01d4e7f9 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -7,8 +7,8 @@
 struct request;
 
 enum blk_integrity_flags {
-	BLK_INTEGRITY_VERIFY		= 1 << 0,
-	BLK_INTEGRITY_GENERATE		= 1 << 1,
+	BLK_INTEGRITY_NOVERIFY		= 1 << 0,
+	BLK_INTEGRITY_NOGENERATE	= 1 << 1,
 	BLK_INTEGRITY_DEVICE_CAPABLE	= 1 << 2,
 	BLK_INTEGRITY_REF_TAG		= 1 << 3,
 };
-- 
cgit v1.2.3


From c6e56cf6b2e79a463af21286ba951714ed20828c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 13 Jun 2024 10:48:22 +0200
Subject: block: move integrity information into queue_limits

Move the integrity information into the queue limits so that it can be
set atomically with other queue limits, and that the sysfs changes to
the read_verify and write_generate flags are properly synchronized.
This also allows to provide a more useful helper to stack the integrity
fields, although it still is separate from the main stacking function
as not all stackable devices want to inherit the integrity settings.
Even with that it greatly simplifies the code in md and dm.

Note that the integrity field is moved as-is into the queue limits.
While there are good arguments for removing the separate blk_integrity
structure, this would cause a lot of churn and might better be done at a
later time if desired.  However the integrity field in the queue_limits
structure is now unconditional so that various ifdefs can be avoided or
replaced with IS_ENABLED().  Given that tiny size of it that seems like
a worthwhile trade off.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240613084839.1044015-13-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-integrity.h | 27 +++++++++++----------------
 include/linux/blkdev.h        | 12 ++++--------
 include/linux/t10-pi.h        | 12 ++----------
 3 files changed, 17 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index bafa01d4e7f9..d201140d77a3 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -11,6 +11,7 @@ enum blk_integrity_flags {
 	BLK_INTEGRITY_NOGENERATE	= 1 << 1,
 	BLK_INTEGRITY_DEVICE_CAPABLE	= 1 << 2,
 	BLK_INTEGRITY_REF_TAG		= 1 << 3,
+	BLK_INTEGRITY_STACKED		= 1 << 4,
 };
 
 struct blk_integrity_iter {
@@ -23,11 +24,15 @@ struct blk_integrity_iter {
 };
 
 const char *blk_integrity_profile_name(struct blk_integrity *bi);
+bool queue_limits_stack_integrity(struct queue_limits *t,
+		struct queue_limits *b);
+static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
+		struct block_device *bdev)
+{
+	return queue_limits_stack_integrity(t, &bdev->bd_disk->queue->limits);
+}
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
-void blk_integrity_register(struct gendisk *, struct blk_integrity *);
-void blk_integrity_unregister(struct gendisk *);
-int blk_integrity_compare(struct gendisk *, struct gendisk *);
 int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
 				   struct scatterlist *);
 int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
@@ -35,14 +40,14 @@ int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
 static inline bool
 blk_integrity_queue_supports_integrity(struct request_queue *q)
 {
-	return q->integrity.tuple_size;
+	return q->limits.integrity.tuple_size;
 }
 
 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 {
 	if (!blk_integrity_queue_supports_integrity(disk->queue))
 		return NULL;
-	return &disk->queue->integrity;
+	return &disk->queue->limits.integrity;
 }
 
 static inline struct blk_integrity *
@@ -119,17 +124,6 @@ blk_integrity_queue_supports_integrity(struct request_queue *q)
 {
 	return false;
 }
-static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
-{
-	return 0;
-}
-static inline void blk_integrity_register(struct gendisk *d,
-					 struct blk_integrity *b)
-{
-}
-static inline void blk_integrity_unregister(struct gendisk *d)
-{
-}
 static inline unsigned short
 queue_max_integrity_segments(const struct request_queue *q)
 {
@@ -157,4 +151,5 @@ static inline struct bio_vec *rq_integrity_vec(struct request *rq)
 	return NULL;
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
+
 #endif /* _LINUX_BLK_INTEGRITY_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f9089750919c..0c247a716885 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -334,6 +334,8 @@ struct queue_limits {
 	 * due to possible offsets.
 	 */
 	unsigned int		dma_alignment;
+
+	struct blk_integrity	integrity;
 };
 
 typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
@@ -419,10 +421,6 @@ struct request_queue {
 
 	struct queue_limits	limits;
 
-#ifdef  CONFIG_BLK_DEV_INTEGRITY
-	struct blk_integrity integrity;
-#endif	/* CONFIG_BLK_DEV_INTEGRITY */
-
 #ifdef CONFIG_PM
 	struct device		*dev;
 	enum rpm_status		rpm_status;
@@ -1300,11 +1298,9 @@ static inline bool bdev_stable_writes(struct block_device *bdev)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
 
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-	/* BLK_INTEGRITY_CSUM_NONE is not available in blkdev.h */
-	if (q->integrity.csum_type != 0)
+	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+	    q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE)
 		return true;
-#endif
 	return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags);
 }
 
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index d2bafb76badf..1773610010eb 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -39,12 +39,8 @@ struct t10_pi_tuple {
 
 static inline u32 t10_pi_ref_tag(struct request *rq)
 {
-	unsigned int shift = ilog2(queue_logical_block_size(rq->q));
+	unsigned int shift = rq->q->limits.integrity.interval_exp;
 
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-	if (rq->q->integrity.interval_exp)
-		shift = rq->q->integrity.interval_exp;
-#endif
 	return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff;
 }
 
@@ -65,12 +61,8 @@ static inline u64 lower_48_bits(u64 n)
 
 static inline u64 ext_pi_ref_tag(struct request *rq)
 {
-	unsigned int shift = ilog2(queue_logical_block_size(rq->q));
+	unsigned int shift = rq->q->limits.integrity.interval_exp;
 
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-	if (rq->q->integrity.interval_exp)
-		shift = rq->q->integrity.interval_exp;
-#endif
 	return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT));
 }
 
-- 
cgit v1.2.3


From 0a5d3258d7c97295a89d22e54733b54aacb62562 Mon Sep 17 00:00:00 2001
From: Tony Ambardar <tony.ambardar@gmail.com>
Date: Mon, 3 Jun 2024 22:23:15 -0700
Subject: compiler_types.h: Define __retain for __attribute__((__retain__))

Some code includes the __used macro to prevent functions and data from
being optimized out. This macro implements __attribute__((__used__)),
which operates at the compiler and IR-level, and so still allows a linker
to remove objects intended to be kept.

Compilers supporting __attribute__((__retain__)) can address this gap by
setting the flag SHF_GNU_RETAIN on the section of a function/variable,
indicating to the linker the object should be retained. This attribute is
available since gcc 11, clang 13, and binutils 2.36.

Provide a __retain macro implementing __attribute__((__retain__)), whose
first user will be the '__bpf_kfunc' tag.

[ Additional remark from discussion:

  Why is CONFIG_LTO_CLANG added here? The __used macro permits garbage
  collection at section level, so CLANG_LTO_CLANG without
  CONFIG_LD_DEAD_CODE_DATA_ELIMINATION should not change final section
  dynamics?

  The conditional guard was included to ensure consistent behaviour
  between __retain and other features forcing split sections. In
  particular, the same guard is used in vmlinux.lds.h to merge split
  sections where needed. For example, using __retain in LLVM builds
  without CONFIG_LTO was failing CI tests on kernel-patches/bpf because
  the kernel didn't boot properly. And in further testing, the kernel
  had no issues loading BPF kfunc modules with such split sections, so
  the module (partial) linking scripts were left alone. ]

Signed-off-by: Tony Ambardar <tony.ambardar@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/
Link: https://lore.kernel.org/bpf/b31bca5a5e6765a0f32cc8c19b1d9cdbfaa822b5.1717477560.git.Tony.Ambardar@gmail.com
---
 include/linux/compiler_types.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 93600de3800b..f14c275950b5 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -143,6 +143,29 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
 # define __preserve_most
 #endif
 
+/*
+ * Annotating a function/variable with __retain tells the compiler to place
+ * the object in its own section and set the flag SHF_GNU_RETAIN. This flag
+ * instructs the linker to retain the object during garbage-cleanup or LTO
+ * phases.
+ *
+ * Note that the __used macro is also used to prevent functions or data
+ * being optimized out, but operates at the compiler/IR-level and may still
+ * allow unintended removal of objects during linking.
+ *
+ * Optional: only supported since gcc >= 11, clang >= 13
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-retain-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#retain
+ */
+#if __has_attribute(__retain__) && \
+	(defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \
+	 defined(CONFIG_LTO_CLANG))
+# define __retain			__attribute__((__retain__))
+#else
+# define __retain
+#endif
+
 /* Compiler specific macros. */
 #ifdef __clang__
 #include <linux/compiler-clang.h>
-- 
cgit v1.2.3


From 7bdcedd5c8fb88e7176b93812b139eca5fe0aa46 Mon Sep 17 00:00:00 2001
From: Tony Ambardar <tony.ambardar@gmail.com>
Date: Mon, 3 Jun 2024 22:23:16 -0700
Subject: bpf: Harden __bpf_kfunc tag against linker kfunc removal

BPF kfuncs are often not directly referenced and may be inadvertently
removed by optimization steps during kernel builds, thus the __bpf_kfunc
tag mitigates against this removal by including the __used macro. However,
this macro alone does not prevent removal during linking, and may still
yield build warnings (e.g. on mips64el):

  [...]
    LD      vmlinux
    BTFIDS  vmlinux
  WARN: resolve_btfids: unresolved symbol bpf_verify_pkcs7_signature
  WARN: resolve_btfids: unresolved symbol bpf_lookup_user_key
  WARN: resolve_btfids: unresolved symbol bpf_lookup_system_key
  WARN: resolve_btfids: unresolved symbol bpf_key_put
  WARN: resolve_btfids: unresolved symbol bpf_iter_task_next
  WARN: resolve_btfids: unresolved symbol bpf_iter_css_task_new
  WARN: resolve_btfids: unresolved symbol bpf_get_file_xattr
  WARN: resolve_btfids: unresolved symbol bpf_ct_insert_entry
  WARN: resolve_btfids: unresolved symbol bpf_cgroup_release
  WARN: resolve_btfids: unresolved symbol bpf_cgroup_from_id
  WARN: resolve_btfids: unresolved symbol bpf_cgroup_acquire
  WARN: resolve_btfids: unresolved symbol bpf_arena_free_pages
    NM      System.map
    SORTTAB vmlinux
    OBJCOPY vmlinux.32
  [...]

Update the __bpf_kfunc tag to better guard against linker optimization by
including the new __retain compiler macro, which fixes the warnings above.

Verify the __retain macro with readelf by checking object flags for 'R':

  $ readelf -Wa kernel/trace/bpf_trace.o
  Section Headers:
    [Nr]  Name              Type     Address  Off  Size ES Flg Lk Inf Al
  [...]
    [178] .text.bpf_key_put PROGBITS 00000000 6420 0050 00 AXR  0   0  8
  [...]
  Key to Flags:
  [...]
    R (retain), D (mbind), p (processor specific)

Fixes: 57e7c169cd6a ("bpf: Add __bpf_kfunc tag for marking kernel functions as kfuncs")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Tony Ambardar <tony.ambardar@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Cc: Yonghong Song <yonghong.song@linux.dev>
Closes: https://lore.kernel.org/r/202401211357.OCX9yllM-lkp@intel.com/
Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/
Link: https://lore.kernel.org/bpf/e9c64e9b5c073dabd457ff45128aabcab7630098.1717477560.git.Tony.Ambardar@gmail.com
---
 include/linux/btf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index f9e56fd12a9f..7c3e40c3295e 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -82,7 +82,7 @@
  * as to avoid issues such as the compiler inlining or eliding either a static
  * kfunc, or a global kfunc in an LTO build.
  */
-#define __bpf_kfunc __used noinline
+#define __bpf_kfunc __used __retain noinline
 
 #define __bpf_kfunc_start_defs()					       \
 	__diag_push();							       \
-- 
cgit v1.2.3


From 98d7ca374ba4b39e7535613d40e159f09ca14da2 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 12 Jun 2024 18:38:13 -0700
Subject: bpf: Track delta between "linked" registers.

Compilers can generate the code
  r1 = r2
  r1 += 0x1
  if r2 < 1000 goto ...
  use knowledge of r2 range in subsequent r1 operations

So remember constant delta between r2 and r1 and update r1 after 'if' condition.

Unfortunately LLVM still uses this pattern for loops with 'can_loop' construct:
for (i = 0; i < 1000 && can_loop; i++)

The "undo" pass was introduced in LLVM
https://reviews.llvm.org/D121937
to prevent this optimization, but it cannot cover all cases.
Instead of fighting middle end optimizer in BPF backend teach the verifier
about this pattern.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/bpf/20240613013815.953-3-alexei.starovoitov@gmail.com
---
 include/linux/bpf_verifier.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 50aa87f8d77f..2b54e25d2364 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -73,7 +73,10 @@ enum bpf_iter_state {
 struct bpf_reg_state {
 	/* Ordering of fields matters.  See states_equal() */
 	enum bpf_reg_type type;
-	/* Fixed part of pointer offset, pointer types only */
+	/*
+	 * Fixed part of pointer offset, pointer types only.
+	 * Or constant delta between "linked" scalars with the same ID.
+	 */
 	s32 off;
 	union {
 		/* valid when type == PTR_TO_PACKET */
@@ -167,6 +170,13 @@ struct bpf_reg_state {
 	 * Similarly to dynptrs, we use ID to track "belonging" of a reference
 	 * to a specific instance of bpf_iter.
 	 */
+	/*
+	 * Upper bit of ID is used to remember relationship between "linked"
+	 * registers. Example:
+	 * r1 = r2;    both will have r1->id == r2->id == N
+	 * r1 += 10;   r1->id == N | BPF_ADD_CONST and r1->off == 10
+	 */
+#define BPF_ADD_CONST (1U << 31)
 	u32 id;
 	/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
 	 * from a pointer-cast helper, bpf_sk_fullsock() and
-- 
cgit v1.2.3


From 894376385a2d80a96816449e4991587a9a5ef0dd Mon Sep 17 00:00:00 2001
From: Sebastian Ene <sebastianene@google.com>
Date: Thu, 13 Jun 2024 13:20:33 +0000
Subject: KVM: arm64: Add support for FFA_PARTITION_INFO_GET

Handle the FFA_PARTITION_INFO_GET host call inside the pKVM hypervisor
and copy the response message back to the host buffers.

Signed-off-by: Sebastian Ene <sebastianene@google.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20240613132035.1070360-3-sebastianene@google.com
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 include/linux/arm_ffa.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index c82d56768101..c6d18f50f671 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -212,6 +212,9 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; }
 
 extern const struct bus_type ffa_bus_type;
 
+/* The FF-A 1.0 partition structure lacks the uuid[4] */
+#define FFA_1_0_PARTITON_INFO_SZ	(8)
+
 /* FFA transport related */
 struct ffa_partition_info {
 	u16 id;
-- 
cgit v1.2.3


From 51104c19d8570eb23208e08eac0e9ae09ced1c15 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 12 Jun 2024 12:59:18 -0700
Subject: kunit: test: Add vm_mmap() allocation resource manager

For tests that need to allocate using vm_mmap() (e.g. usercopy and
execve), provide the interface to have the allocation tracked by KUnit
itself. This requires bringing up a placeholder userspace mm.

This combines my earlier attempt at this with Mark Rutland's version[1].

Normally alloc_mm() and arch_pick_mmap_layout() aren't exported for
modules, so export these only for KUnit testing.

Link: https://lore.kernel.org/lkml/20230321122514.1743889-2-mark.rutland@arm.com/ [1]
Co-developed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Kees Cook <kees@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/kunit/test.h b/include/kunit/test.h
index e32b4cb7afa2..ec61cad6b71d 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -480,6 +480,23 @@ static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp
 	return kunit_kmalloc_array(test, n, size, gfp | __GFP_ZERO);
 }
 
+/**
+ * kunit_vm_mmap() - Allocate KUnit-tracked vm_mmap() area
+ * @test: The test context object.
+ * @file: struct file pointer to map from, if any
+ * @addr: desired address, if any
+ * @len: how many bytes to allocate
+ * @prot: mmap PROT_* bits
+ * @flag: mmap flags
+ * @offset: offset into @file to start mapping from.
+ *
+ * See vm_mmap() for more information.
+ */
+unsigned long kunit_vm_mmap(struct kunit *test, struct file *file,
+			    unsigned long addr, unsigned long len,
+			    unsigned long prot, unsigned long flag,
+			    unsigned long offset);
+
 void kunit_cleanup(struct kunit *test);
 
 void __printf(2, 3) kunit_log_append(struct string_stream *log, const char *fmt, ...);
-- 
cgit v1.2.3


From e575d3a6dd22123888defb622b1742aa2d45b942 Mon Sep 17 00:00:00 2001
From: Cosmin Ratiu <cratiu@nvidia.com>
Date: Fri, 14 Jun 2024 00:00:31 +0300
Subject: net/mlx5: Correct TASR typo into TSAR

TSAR is the correct spelling (Transmit Scheduling ARbiter).

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/20240613210036.1125203-2-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 17acd0f3ca8e..466dcda40bb5 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3914,7 +3914,7 @@ enum {
 };
 
 enum {
-	ELEMENT_TYPE_CAP_MASK_TASR		= 1 << 0,
+	ELEMENT_TYPE_CAP_MASK_TSAR		= 1 << 0,
 	ELEMENT_TYPE_CAP_MASK_VPORT		= 1 << 1,
 	ELEMENT_TYPE_CAP_MASK_VPORT_TC		= 1 << 2,
 	ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC	= 1 << 3,
-- 
cgit v1.2.3


From 296eaab825060d0f25e89b2f85ab9fc26128cea8 Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Date: Fri, 14 Jun 2024 00:00:36 +0300
Subject: net/mlx5e: Support SWP-mode offload L4 csum calculation

Calculate the pseudo-header checksum for both IPSec transport mode and
IPSec tunnel mode for mlx5 devices that do not implement a pure hardware
checksum offload for L4 checksum calculation. Introduce a capability bit
that identifies such mlx5 devices.

Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/20240613210036.1125203-7-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 466dcda40bb5..66b921c81c0f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1093,7 +1093,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         tunnel_stateless_ip_over_ip_tx[0x1];
 	u8         reserved_at_2e[0x2];
 	u8         max_vxlan_udp_ports[0x8];
-	u8         reserved_at_38[0x6];
+	u8         swp_csum_l4_partial[0x1];
+	u8         reserved_at_39[0x5];
 	u8         max_geneve_opt_len[0x1];
 	u8         tunnel_stateless_geneve_rx[0x1];
 
-- 
cgit v1.2.3


From 6c3282a6b296385bee2c383442c39f507b0d51dd Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 13 Jun 2024 11:36:06 +0100
Subject: net: stmmac: add select_pcs() platform method

Allow platform drivers to provide their logic to select an appropriate
PCS.

Tested-by: Romain Gantois <romain.gantois@bootlin.com>
Reviewed-by: Romain Gantois <romain.gantois@bootlin.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://lore.kernel.org/r/E1sHhoM-00Fesu-8E@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/stmmac.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 8f0f156d50d3..9c54f82901a1 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -13,7 +13,7 @@
 #define __STMMAC_PLATFORM_DATA
 
 #include <linux/platform_device.h>
-#include <linux/phy.h>
+#include <linux/phylink.h>
 
 #define MTL_MAX_RX_QUEUES	8
 #define MTL_MAX_TX_QUEUES	8
@@ -271,6 +271,8 @@ struct plat_stmmacenet_data {
 	void (*dump_debug_regs)(void *priv);
 	int (*pcs_init)(struct stmmac_priv *priv);
 	void (*pcs_exit)(struct stmmac_priv *priv);
+	struct phylink_pcs *(*select_pcs)(struct stmmac_priv *priv,
+					  phy_interface_t interface);
 	void *bsp_priv;
 	struct clk *stmmac_clk;
 	struct clk *pclk;
-- 
cgit v1.2.3


From e7da16abf030b39c3598574d5457f324a6f0e4a7 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 Jun 2024 22:14:33 +0900
Subject: firewire: core: record card index in tracepoinrts events derived from
 async_outbound_complete_template

The asynchronous transaction is initiated on one of 1394 OHCI controller,
however the existing tracepoints events has the lack of data about it.

This commit adds card_index member into event structure to store the index
of host controller in use, and prints it.

Link: https://lore.kernel.org/r/20240613131440.431766-2-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index d695a560673f..ca6ea9bd1eba 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -71,10 +71,11 @@ DECLARE_EVENT_CLASS(async_outbound_initiate_template,
 
 // The value of status is one of ack codes and rcodes specific to Linux FireWire subsystem.
 DECLARE_EVENT_CLASS(async_outbound_complete_template,
-	TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp),
-	TP_ARGS(transaction, generation, scode, status, timestamp),
+	TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp),
+	TP_ARGS(transaction, card_index, generation, scode, status, timestamp),
 	TP_STRUCT__entry(
 		__field(u64, transaction)
+		__field(u8, card_index)
 		__field(u8, generation)
 		__field(u8, scode)
 		__field(u8, status)
@@ -82,14 +83,16 @@ DECLARE_EVENT_CLASS(async_outbound_complete_template,
 	),
 	TP_fast_assign(
 		__entry->transaction = transaction;
+		__entry->card_index = card_index;
 		__entry->generation = generation;
 		__entry->scode = scode;
 		__entry->status = status;
 		__entry->timestamp = timestamp;
 	),
 	TP_printk(
-		"transaction=0x%llx generation=%u scode=%u status=%u timestamp=0x%04x",
+		"transaction=0x%llx card_index=%u generation=%u scode=%u status=%u timestamp=0x%04x",
 		__entry->transaction,
+		__entry->card_index,
 		__entry->generation,
 		__entry->scode,
 		__entry->status,
@@ -144,8 +147,8 @@ DEFINE_EVENT(async_outbound_initiate_template, async_request_outbound_initiate,
 );
 
 DEFINE_EVENT(async_outbound_complete_template, async_request_outbound_complete,
-	TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp),
-	TP_ARGS(transaction, generation, scode, status, timestamp)
+	TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp),
+	TP_ARGS(transaction, card_index, generation, scode, status, timestamp)
 );
 
 DEFINE_EVENT(async_inbound_template, async_response_inbound,
@@ -194,8 +197,8 @@ DEFINE_EVENT_PRINT(async_outbound_initiate_template, async_response_outbound_ini
 );
 
 DEFINE_EVENT(async_outbound_complete_template, async_response_outbound_complete,
-	TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp),
-	TP_ARGS(transaction, generation, scode, status, timestamp)
+	TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp),
+	TP_ARGS(transaction, card_index, generation, scode, status, timestamp)
 );
 
 #undef ASYNC_HEADER_GET_DESTINATION
-- 
cgit v1.2.3


From 64e02b64fb1d832a9a5254055a829db64750421a Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 Jun 2024 22:14:34 +0900
Subject: firewire: core: record card index in tracepoinrts events derived from
 async_outbound_initiate_template

The asynchronous transaction is initiated on one of 1394 OHCI controller,
however the existing tracepoints events has the lack of data about it.

This commit adds card_index member into event structure to store the index
of host controller in use, and prints it.

Link: https://lore.kernel.org/r/20240613131440.431766-3-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index ca6ea9bd1eba..a3d9916cbad1 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -36,10 +36,11 @@
 #define QUADLET_SIZE	4
 
 DECLARE_EVENT_CLASS(async_outbound_initiate_template,
-	TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count),
-	TP_ARGS(transaction, generation, scode, header, data, data_count),
+	TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count),
+	TP_ARGS(transaction, card_index, generation, scode, header, data, data_count),
 	TP_STRUCT__entry(
 		__field(u64, transaction)
+		__field(u8, card_index)
 		__field(u8, generation)
 		__field(u8, scode)
 		__array(u32, header, ASYNC_HEADER_QUADLET_COUNT)
@@ -47,6 +48,7 @@ DECLARE_EVENT_CLASS(async_outbound_initiate_template,
 	),
 	TP_fast_assign(
 		__entry->transaction = transaction;
+		__entry->card_index = card_index;
 		__entry->generation = generation;
 		__entry->scode = scode;
 		memcpy(__entry->header, header, QUADLET_SIZE * ASYNC_HEADER_QUADLET_COUNT);
@@ -54,8 +56,9 @@ DECLARE_EVENT_CLASS(async_outbound_initiate_template,
 	),
 	// This format is for the request subaction.
 	TP_printk(
-		"transaction=0x%llx generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s",
+		"transaction=0x%llx card_index=%u generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s",
 		__entry->transaction,
+		__entry->card_index,
 		__entry->generation,
 		__entry->scode,
 		ASYNC_HEADER_GET_DESTINATION(__entry->header),
@@ -142,8 +145,8 @@ DECLARE_EVENT_CLASS(async_inbound_template,
 );
 
 DEFINE_EVENT(async_outbound_initiate_template, async_request_outbound_initiate,
-	TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count),
-	TP_ARGS(transaction, generation, scode, header, data, data_count)
+	TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count),
+	TP_ARGS(transaction, card_index, generation, scode, header, data, data_count)
 );
 
 DEFINE_EVENT(async_outbound_complete_template, async_request_outbound_complete,
@@ -178,11 +181,12 @@ DEFINE_EVENT_PRINT(async_inbound_template, async_request_inbound,
 );
 
 DEFINE_EVENT_PRINT(async_outbound_initiate_template, async_response_outbound_initiate,
-	TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count),
-	TP_ARGS(transaction, generation, scode, header, data, data_count),
+	TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count),
+	TP_ARGS(transaction, card_index, generation, scode, header, data, data_count),
 	TP_printk(
-		"transaction=0x%llx generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s",
+		"transaction=0x%llx card_index=%u generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s",
 		__entry->transaction,
+		__entry->card_index,
 		__entry->generation,
 		__entry->scode,
 		ASYNC_HEADER_GET_DESTINATION(__entry->header),
-- 
cgit v1.2.3


From 65ec7ebefe7de01281cce1f552ebd4dd00386665 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 Jun 2024 22:14:35 +0900
Subject: firewire: core: record card index in tracepoinrts events derived from
 async_inbound_template

The asynchronous transaction is initiated on one of 1394 OHCI controller,
however the existing tracepoints events has the lack of data about it.

This commit adds card_index member into event structure to store the index
of host controller in use, and prints it.

Link: https://lore.kernel.org/r/20240613131440.431766-4-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index a3d9916cbad1..b72f613cfa02 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -105,10 +105,11 @@ DECLARE_EVENT_CLASS(async_outbound_complete_template,
 
 // The value of status is one of ack codes and rcodes specific to Linux FireWire subsystem.
 DECLARE_EVENT_CLASS(async_inbound_template,
-	TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count),
-	TP_ARGS(transaction, generation, scode, status, timestamp, header, data, data_count),
+	TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count),
+	TP_ARGS(transaction, card_index, generation, scode, status, timestamp, header, data, data_count),
 	TP_STRUCT__entry(
 		__field(u64, transaction)
+		__field(u8, card_index)
 		__field(u8, generation)
 		__field(u8, scode)
 		__field(u8, status)
@@ -118,6 +119,7 @@ DECLARE_EVENT_CLASS(async_inbound_template,
 	),
 	TP_fast_assign(
 		__entry->transaction = transaction;
+		__entry->card_index = card_index;
 		__entry->generation = generation;
 		__entry->scode = scode;
 		__entry->status = status;
@@ -127,8 +129,9 @@ DECLARE_EVENT_CLASS(async_inbound_template,
 	),
 	// This format is for the response subaction.
 	TP_printk(
-		"transaction=0x%llx generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s",
+		"transaction=0x%llx card_index=%u generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s",
 		__entry->transaction,
+		__entry->card_index,
 		__entry->generation,
 		__entry->scode,
 		__entry->status,
@@ -155,16 +158,17 @@ DEFINE_EVENT(async_outbound_complete_template, async_request_outbound_complete,
 );
 
 DEFINE_EVENT(async_inbound_template, async_response_inbound,
-	TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count),
-	TP_ARGS(transaction, generation, scode, status, timestamp, header, data, data_count)
+	TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count),
+	TP_ARGS(transaction, card_index, generation, scode, status, timestamp, header, data, data_count)
 );
 
 DEFINE_EVENT_PRINT(async_inbound_template, async_request_inbound,
-	TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count),
-	TP_ARGS(transaction, generation, scode, status, timestamp, header, data, data_count),
+	TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count),
+	TP_ARGS(transaction, card_index, generation, scode, status, timestamp, header, data, data_count),
 	TP_printk(
-		"transaction=0x%llx generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s",
+		"transaction=0x%llx card_index=%u generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s",
 		__entry->transaction,
+		__entry->card_index,
 		__entry->generation,
 		__entry->scode,
 		__entry->status,
-- 
cgit v1.2.3


From 3cb44a72a39835b368ab78d739819330089aa2bf Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 Jun 2024 22:14:36 +0900
Subject: firewire: core: record card index in async_phy_outbound_initiate
 tracepoints event

The asynchronous transaction is initiated on one of 1394 OHCI
controller, however the existing tracepoints events has the lack of data
about it.

This commit adds card_index member into event structure to store the index
of host controller in use, and prints it.

Link: https://lore.kernel.org/r/20240613131440.431766-5-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index b72f613cfa02..a59dc26b2a53 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -217,23 +217,26 @@ DEFINE_EVENT(async_outbound_complete_template, async_response_outbound_complete,
 #undef ASYNC_HEADER_GET_RCODE
 
 TRACE_EVENT(async_phy_outbound_initiate,
-	TP_PROTO(u64 packet, unsigned int generation, u32 first_quadlet, u32 second_quadlet),
-	TP_ARGS(packet, generation, first_quadlet, second_quadlet),
+	TP_PROTO(u64 packet, unsigned int card_index, unsigned int generation, u32 first_quadlet, u32 second_quadlet),
+	TP_ARGS(packet, card_index, generation, first_quadlet, second_quadlet),
 	TP_STRUCT__entry(
 		__field(u64, packet)
+		__field(u8, card_index)
 		__field(u8, generation)
 		__field(u32, first_quadlet)
 		__field(u32, second_quadlet)
 	),
 	TP_fast_assign(
 		__entry->packet = packet;
+		__entry->card_index = card_index;
 		__entry->generation = generation;
 		__entry->first_quadlet = first_quadlet;
 		__entry->second_quadlet = second_quadlet
 	),
 	TP_printk(
-		"packet=0x%llx generation=%u first_quadlet=0x%08x second_quadlet=0x%08x",
+		"packet=0x%llx card_index=%u generation=%u first_quadlet=0x%08x second_quadlet=0x%08x",
 		__entry->packet,
+		__entry->card_index,
 		__entry->generation,
 		__entry->first_quadlet,
 		__entry->second_quadlet
-- 
cgit v1.2.3


From 810f2aa83563020d834425018303f2aaecef1e6e Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 Jun 2024 22:14:37 +0900
Subject: firewire: core: record card index in async_phy_outbound_complete
 tracepoints event

The asynchronous transmission of phy packet is initiated on one of 1394
OHCI controller, however the existing tracepoints events has the lack of
data about it.

This commit adds card_index member into event structure to store the index
of host controller in use, and prints it.

Link: https://lore.kernel.org/r/20240613131440.431766-6-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index a59dc26b2a53..61c7a2461fbc 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -244,23 +244,26 @@ TRACE_EVENT(async_phy_outbound_initiate,
 );
 
 TRACE_EVENT(async_phy_outbound_complete,
-	TP_PROTO(u64 packet, unsigned int generation, unsigned int status, unsigned int timestamp),
-	TP_ARGS(packet, generation, status, timestamp),
+	TP_PROTO(u64 packet, unsigned int card_index, unsigned int generation, unsigned int status, unsigned int timestamp),
+	TP_ARGS(packet, card_index, generation, status, timestamp),
 	TP_STRUCT__entry(
 		__field(u64, packet)
+		__field(u8, card_index)
 		__field(u8, generation)
 		__field(u8, status)
 		__field(u16, timestamp)
 	),
 	TP_fast_assign(
 		__entry->packet = packet;
+		__entry->card_index = card_index;
 		__entry->generation = generation;
 		__entry->status = status;
 		__entry->timestamp = timestamp;
 	),
 	TP_printk(
-		"packet=0x%llx generation=%u status=%u timestamp=0x%04x",
+		"packet=0x%llx card_index=%u generation=%u status=%u timestamp=0x%04x",
 		__entry->packet,
+		__entry->card_index,
 		__entry->generation,
 		__entry->status,
 		__entry->timestamp
-- 
cgit v1.2.3


From abbb4bd96d7f871b10bd7058f7284ffaf4e8257f Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 Jun 2024 22:14:38 +0900
Subject: firewire: core: record card index in async_phy_inbound tracepoints
 event

The asynchronous transmission of phy packet is initiated on one of 1394
OHCI controller, however the existing tracepoints events has the lack of
data about it.

This commit adds card_index member into event structure to store the index
of host controller in use, and prints it.

Link: https://lore.kernel.org/r/20240613131440.431766-7-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index 61c7a2461fbc..e5524fc71880 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -271,10 +271,11 @@ TRACE_EVENT(async_phy_outbound_complete,
 );
 
 TRACE_EVENT(async_phy_inbound,
-	TP_PROTO(u64 packet, unsigned int generation, unsigned int status, unsigned int timestamp, u32 first_quadlet, u32 second_quadlet),
-	TP_ARGS(packet, generation, status, timestamp, first_quadlet, second_quadlet),
+	TP_PROTO(u64 packet, unsigned int card_index, unsigned int generation, unsigned int status, unsigned int timestamp, u32 first_quadlet, u32 second_quadlet),
+	TP_ARGS(packet, card_index, generation, status, timestamp, first_quadlet, second_quadlet),
 	TP_STRUCT__entry(
 		__field(u64, packet)
+		__field(u8, card_index)
 		__field(u8, generation)
 		__field(u8, status)
 		__field(u16, timestamp)
@@ -290,8 +291,9 @@ TRACE_EVENT(async_phy_inbound,
 		__entry->second_quadlet = second_quadlet
 	),
 	TP_printk(
-		"packet=0x%llx generation=%u status=%u timestamp=0x%04x first_quadlet=0x%08x second_quadlet=0x%08x",
+		"packet=0x%llx card_index=%u generation=%u status=%u timestamp=0x%04x first_quadlet=0x%08x second_quadlet=0x%08x",
 		__entry->packet,
+		__entry->card_index,
 		__entry->generation,
 		__entry->status,
 		__entry->timestamp,
-- 
cgit v1.2.3


From 7507dbc46b780e9fa2ec3d4db7cd9ce07e722927 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 Jun 2024 22:14:39 +0900
Subject: firewire: core: record card index in tracepoinrts events derived from
 bus_reset_arrange_template

The asynchronous transmission of phy packet is initiated on one of 1394
OHCI controller, however the existing tracepoints events has the lack of
data about it.

This commit adds card_index member into event structure to store the index
of host controller in use, and prints it.

Link: https://lore.kernel.org/r/20240613131440.431766-8-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index e5524fc71880..e6485051f546 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -303,36 +303,39 @@ TRACE_EVENT(async_phy_inbound,
 );
 
 DECLARE_EVENT_CLASS(bus_reset_arrange_template,
-	TP_PROTO(unsigned int generation, bool short_reset),
-	TP_ARGS(generation, short_reset),
+	TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset),
+	TP_ARGS(card_index, generation, short_reset),
 	TP_STRUCT__entry(
+		__field(u8, card_index)
 		__field(u8, generation)
 		__field(bool, short_reset)
 	),
 	TP_fast_assign(
+		__entry->card_index = card_index;
 		__entry->generation = generation;
 		__entry->short_reset = short_reset;
 	),
 	TP_printk(
-		"generation=%u short_reset=%s",
+		"card_index=%u generation=%u short_reset=%s",
+		__entry->card_index,
 		__entry->generation,
 		__entry->short_reset ? "true" : "false"
 	)
 );
 
 DEFINE_EVENT(bus_reset_arrange_template, bus_reset_initiate,
-	TP_PROTO(unsigned int generation, bool short_reset),
-	TP_ARGS(generation, short_reset)
+	TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset),
+	TP_ARGS(card_index, generation, short_reset)
 );
 
 DEFINE_EVENT(bus_reset_arrange_template, bus_reset_schedule,
-	TP_PROTO(unsigned int generation, bool short_reset),
-	TP_ARGS(generation, short_reset)
+	TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset),
+	TP_ARGS(card_index, generation, short_reset)
 );
 
 DEFINE_EVENT(bus_reset_arrange_template, bus_reset_postpone,
-	TP_PROTO(unsigned int generation, bool short_reset),
-	TP_ARGS(generation, short_reset)
+	TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset),
+	TP_ARGS(card_index, generation, short_reset)
 );
 
 TRACE_EVENT(bus_reset_handle,
-- 
cgit v1.2.3


From 893098b2af3ea12bab2f505aa825662b379df67d Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 Jun 2024 22:14:40 +0900
Subject: firewire: core: record card index in bus_reset_handle tracepoints
 event

The bus reset event occurs in the bus managed by one of 1394 OHCI
controller in Linux system, however the existing tracepoints events has
the lack of data about it to distinguish the issued hardware from the
others.

This commit adds card_index member into event structure to store the index
of host controller in use, and prints it.

Link: https://lore.kernel.org/r/20240613131440.431766-9-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index e6485051f546..5ccc0d91b220 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -339,22 +339,25 @@ DEFINE_EVENT(bus_reset_arrange_template, bus_reset_postpone,
 );
 
 TRACE_EVENT(bus_reset_handle,
-	TP_PROTO(unsigned int generation, unsigned int node_id, bool bm_abdicate, u32 *self_ids, unsigned int self_id_count),
-	TP_ARGS(generation, node_id, bm_abdicate, self_ids, self_id_count),
+	TP_PROTO(unsigned int card_index, unsigned int generation, unsigned int node_id, bool bm_abdicate, u32 *self_ids, unsigned int self_id_count),
+	TP_ARGS(card_index, generation, node_id, bm_abdicate, self_ids, self_id_count),
 	TP_STRUCT__entry(
+		__field(u8, card_index)
 		__field(u8, generation)
 		__field(u8, node_id)
 		__field(bool, bm_abdicate)
 		__dynamic_array(u32, self_ids, self_id_count)
 	),
 	TP_fast_assign(
+		__entry->card_index = card_index;
 		__entry->generation = generation;
 		__entry->node_id = node_id;
 		__entry->bm_abdicate = bm_abdicate;
 		memcpy(__get_dynamic_array(self_ids), self_ids, __get_dynamic_array_len(self_ids));
 	),
 	TP_printk(
-		"generation=%u node_id=0x%04x bm_abdicate=%s self_ids=%s",
+		"card_index=%u generation=%u node_id=0x%04x bm_abdicate=%s self_ids=%s",
+		__entry->card_index,
 		__entry->generation,
 		__entry->node_id,
 		__entry->bm_abdicate ? "true" : "false",
-- 
cgit v1.2.3


From 384a746bb55960aa5ffb3a67de08f11fc2f51042 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 5 Jun 2024 11:17:10 +0200
Subject: Revert "mm: init_mlocked_on_free_v3"

There was insufficient review and no agreement that this is the right
approach.

There are serious flaws with the implementation that make processes using
mlock() not even work with simple fork() [1] and we get reliable crashes
when rebooting.

Further, simply because we might be unmapping a single PTE of a large
mlocked folio, we shouldn't zero out the whole folio.

... especially because the code can also *corrupt* urelated memory because
	kernel_init_pages(page, folio_nr_pages(folio));

Could end up writing outside of the actual folio if we work with a tail
page.

Let's revert it.  Once there is agreement that this is the right approach,
the issues were fixed and there was reasonable review and proper testing,
we can consider it again.

[1] https://lkml.kernel.org/r/4da9da2f-73e4-45fd-b62f-a8a513314057@redhat.com

Link: https://lkml.kernel.org/r/20240605091710.38961-1-david@redhat.com
Fixes: ba42b524a040 ("mm: init_mlocked_on_free_v3")
Signed-off-by: David Hildenbrand <david@redhat.com>
Reported-by: David Wang <00107082@163.com>
Closes: https://lore.kernel.org/lkml/20240528151340.4282-1-00107082@163.com/
Reported-by: Lance Yang <ioworker0@gmail.com>
Closes: https://lkml.kernel.org/r/20240601140917.43562-1-ioworker0@gmail.com
Acked-by: Lance Yang <ioworker0@gmail.com>
Cc: York Jasper Niebuhr <yjnworkstation@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9849dfda44d4..9a5652c5fadd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3776,14 +3776,7 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
 static inline bool want_init_on_free(void)
 {
 	return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
-				&init_on_free);
-}
-
-DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free);
-static inline bool want_init_mlocked_on_free(void)
-{
-	return static_branch_maybe(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON,
-				&init_mlocked_on_free);
+				   &init_on_free);
 }
 
 extern bool _debug_pagealloc_enabled_early;
-- 
cgit v1.2.3


From a273559e9eb68cb58c57803d76a1622b8324a878 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Sat, 1 Jun 2024 16:38:40 -0700
Subject: lib/alloc_tag: fix RCU imbalance in pgalloc_tag_get()

put_page_tag_ref() should be called only when get_page_tag_ref() returns a
valid reference because only in that case get_page_tag_ref() enters RCU
read section while put_page_tag_ref() will call rcu_read_unlock() even if
the provided reference is NULL.  Fix pgalloc_tag_get() which does not
follow this rule causing RCU imbalance.  Add a warning in
put_page_tag_ref() to catch any future mistakes.

Link: https://lkml.kernel.org/r/20240601233840.617458-1-surenb@google.com
Fixes: cc92eba1c88b ("mm: fix non-compound multi-order memory accounting in __free_pages")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202405271029.6d2f9c4c-lkp@intel.com
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Kees Cook <keescook@chromium.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgalloc_tag.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 86ba5d33e43b..9cacadbd61f8 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -37,6 +37,9 @@ static inline union codetag_ref *get_page_tag_ref(struct page *page)
 
 static inline void put_page_tag_ref(union codetag_ref *ref)
 {
+	if (WARN_ON(!ref))
+		return;
+
 	page_ext_put(page_ext_from_codetag_ref(ref));
 }
 
@@ -102,9 +105,11 @@ static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
 		union codetag_ref *ref = get_page_tag_ref(page);
 
 		alloc_tag_sub_check(ref);
-		if (ref && ref->ct)
-			tag = ct_to_alloc_tag(ref->ct);
-		put_page_tag_ref(ref);
+		if (ref) {
+			if (ref->ct)
+				tag = ct_to_alloc_tag(ref->ct);
+			put_page_tag_ref(ref);
+		}
 	}
 
 	return tag;
-- 
cgit v1.2.3


From 6a50c9b512f7734bc356f4bd47885a6f7c98491a Mon Sep 17 00:00:00 2001
From: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Date: Fri, 7 Jun 2024 17:40:48 +0800
Subject: mm: huge_memory: fix misused mapping_large_folio_support() for anon
 folios

When I did a large folios split test, a WARNING "[ 5059.122759][ T166]
Cannot split file folio to non-0 order" was triggered.  But the test cases
are only for anonmous folios.  while mapping_large_folio_support() is only
reasonable for page cache folios.

In split_huge_page_to_list_to_order(), the folio passed to
mapping_large_folio_support() maybe anonmous folio.  The folio_test_anon()
check is missing.  So the split of the anonmous THP is failed.  This is
also the same for shmem_mapping().  We'd better add a check for both.  But
the shmem_mapping() in __split_huge_page() is not involved, as for
anonmous folios, the end parameter is set to -1, so (head[i].index >= end)
is always false.  shmem_mapping() is not called.

Also add a VM_WARN_ON_ONCE() in mapping_large_folio_support() for anon
mapping, So we can detect the wrong use more easily.

THP folios maybe exist in the pagecache even the file system doesn't
support large folio, it is because when CONFIG_TRANSPARENT_HUGEPAGE is
enabled, khugepaged will try to collapse read-only file-backed pages to
THP.  But the mapping does not actually support multi order large folios
properly.

Using /sys/kernel/debug/split_huge_pages to verify this, with this patch,
large anon THP is successfully split and the warning is ceased.

Link: https://lkml.kernel.org/r/202406071740485174hcFl7jRxncsHDtI-Pz-o@zte.com.cn
Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages")
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ee633712bba0..59f1df0cde5a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -381,6 +381,10 @@ static inline void mapping_set_large_folios(struct address_space *mapping)
  */
 static inline bool mapping_large_folio_support(struct address_space *mapping)
 {
+	/* AS_LARGE_FOLIO_SUPPORT is only reasonable for pagecache folios */
+	VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON,
+			"Anonymous mapping always supports large folio");
+
 	return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
 		test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 }
-- 
cgit v1.2.3


From 01c8f9806bde438ca1c8cbbc439f0a14a6694f6c Mon Sep 17 00:00:00 2001
From: Aleksandr Nogikh <nogikh@google.com>
Date: Tue, 11 Jun 2024 15:32:29 +0200
Subject: kcov: don't lose track of remote references during softirqs

In kcov_remote_start()/kcov_remote_stop(), we swap the previous KCOV
metadata of the current task into a per-CPU variable.  However, the
kcov_mode_enabled(mode) check is not sufficient in the case of remote KCOV
coverage: current->kcov_mode always remains KCOV_MODE_DISABLED for remote
KCOV objects.

If the original task that has invoked the KCOV_REMOTE_ENABLE ioctl happens
to get interrupted and kcov_remote_start() is called, it ultimately leads
to kcov_remote_stop() NOT restoring the original KCOV reference.  So when
the task exits, all registered remote KCOV handles remain active forever.

The most uncomfortable effect (at least for syzkaller) is that the bug
prevents the reuse of the same /sys/kernel/debug/kcov descriptor.  If
we obtain it in the parent process and then e.g.  drop some
capabilities and continuously fork to execute individual programs, at
some point current->kcov of the forked process is lost,
kcov_task_exit() takes no action, and all KCOV_REMOTE_ENABLE ioctls
calls from subsequent forks fail.

And, yes, the efficiency is also affected if we keep on losing remote
kcov objects.
a) kcov_remote_map keeps on growing forever.
b) (If I'm not mistaken), we're also not freeing the memory referenced
by kcov->area.

Fix it by introducing a special kcov_mode that is assigned to the task
that owns a KCOV remote object.  It makes kcov_mode_enabled() return true
and yet does not trigger coverage collection in __sanitizer_cov_trace_pc()
and write_comp_data().

[nogikh@google.com: replace WRITE_ONCE() with an ordinary assignment]
  Link: https://lkml.kernel.org/r/20240614171221.2837584-1-nogikh@google.com
Link: https://lkml.kernel.org/r/20240611133229.527822-1-nogikh@google.com
Fixes: 5ff3b30ab57d ("kcov: collect coverage from interrupts")
Signed-off-by: Aleksandr Nogikh <nogikh@google.com>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Tested-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Marco Elver <elver@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kcov.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index b851ba415e03..3b479a3d235a 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -21,6 +21,8 @@ enum kcov_mode {
 	KCOV_MODE_TRACE_PC = 2,
 	/* Collecting comparison operands mode. */
 	KCOV_MODE_TRACE_CMP = 3,
+	/* The process owns a KCOV remote reference. */
+	KCOV_MODE_REMOTE = 4,
 };
 
 #define KCOV_IN_CTXSW	(1 << 30)
-- 
cgit v1.2.3


From 4604b3888f614a353c7afa17bdc8e7393bb1f9a1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 5 Jun 2024 21:51:33 +0300
Subject: hwrng: core - Remove list.h from the hw_random.h

The 'struct list' type is defined in types.h, no need to include list.h
for that.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/hw_random.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 136e9842120e..b424555753b1 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -13,9 +13,8 @@
 #define LINUX_HWRANDOM_H_
 
 #include <linux/completion.h>
-#include <linux/types.h>
-#include <linux/list.h>
 #include <linux/kref.h>
+#include <linux/types.h>
 
 /**
  * struct hwrng - Hardware Random Number Generator driver
-- 
cgit v1.2.3


From 0eb3bed57a06a20c612012127f4405d48fa4a50f Mon Sep 17 00:00:00 2001
From: Stefan Berger <stefanb@linux.ibm.com>
Date: Fri, 7 Jun 2024 18:34:17 -0400
Subject: crypto: ecc - Add comment to ecc_digits_from_bytes about input byte
 array

Add comment to ecc_digits_from_bytes kdoc that the first byte is expected
to hold the most significant bits of the large integer that is converted
into an array of digits.

Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/ecc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h
index f7e75e1e71f3..0717a53ae732 100644
--- a/include/crypto/internal/ecc.h
+++ b/include/crypto/internal/ecc.h
@@ -63,6 +63,9 @@ static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigit
  * @nbytes    Size of input byte array
  * @out       Output digits array
  * @ndigits:  Number of digits to create from byte array
+ *
+ * The first byte in the input byte array is expected to hold the most
+ * significant bits of the large integer.
  */
 void ecc_digits_from_bytes(const u8 *in, unsigned int nbytes,
 			   u64 *out, unsigned int ndigits);
-- 
cgit v1.2.3


From 8043832e2a123fd9372007a29192f2f3ba328cd6 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (IBM)" <rppt@kernel.org>
Date: Fri, 14 Jun 2024 11:05:43 +0300
Subject: memblock: use numa_valid_node() helper to check for invalid node ID

Introduce numa_valid_node(nid) that verifies that nid is a valid node ID
and use that instead of comparing nid parameter with either NUMA_NO_NODE
or MAX_NUMNODES.

This makes the checks for valid node IDs consistent and more robust and
allows to get rid of multiple WARNings.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
---
 include/linux/numa.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/numa.h b/include/linux/numa.h
index 1d43371fafd2..eb19503604fe 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -15,6 +15,11 @@
 #define	NUMA_NO_NODE	(-1)
 #define	NUMA_NO_MEMBLK	(-1)
 
+static inline bool numa_valid_node(int nid)
+{
+	return nid >= 0 && nid < MAX_NUMNODES;
+}
+
 /* optionally keep NUMA memory info available post init */
 #ifdef CONFIG_NUMA_KEEP_MEMINFO
 #define __initdata_or_meminfo
-- 
cgit v1.2.3


From d98995b4bf981519dde4af0a081c393d62474039 Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@nvidia.com>
Date: Mon, 3 Jun 2024 13:26:37 +0300
Subject: net/mlx5: Reimplement write combining test

The test of write combining was added before in mlx5_ib driver. It
opens UD QP and posts NOP WQEs, and uses BlueFlame doorbell. When
BlueFlame is used, WQEs get written directly to a PCI BAR of the
device (in addition to memory) so that the device handles them without
having to access memory.

In this test, the WQEs written in memory are different from the ones
written to the BlueFlame which request CQE update. By checking the
completion reports posted on CQ, we can know if BlueFlame succeeds or
not. The write combining must be supported if BlueFlame succeeds as
its register is written using write combining.

This patch reimplements the test in the same way, but using a pair of
SQ and CQ only. It is moved to mlx5_core as a general feature used by
both mlx5_core and mlx5_ib.

Besides, save write combine test result of the PCI function, so that
its thousands of child functions such as SF can query without paying
the time and resource penalty by itself. The test function is called
only after failing to get the cached result. With this enhancement,
all thousands of SFs of the PF attached to same driver no longer need
to perform WC check explicitly, which is already done in the system.
This saves several commands per SF, thereby speeds up SF creation and
also saves completion EQ creation.

Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/4ff5a8cc4c5b5b0d98397baa45a5019bcdbf096e.1717409369.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/driver.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 779cfdf2e9d6..0d31f77396fc 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -766,6 +766,12 @@ struct mlx5_hca_cap {
 	u32 max[MLX5_UN_SZ_DW(hca_cap_union)];
 };
 
+enum mlx5_wc_state {
+	MLX5_WC_STATE_UNINITIALIZED,
+	MLX5_WC_STATE_UNSUPPORTED,
+	MLX5_WC_STATE_SUPPORTED,
+};
+
 struct mlx5_core_dev {
 	struct device *device;
 	enum mlx5_coredev_type coredev_type;
@@ -824,6 +830,9 @@ struct mlx5_core_dev {
 #endif
 	u64 num_ipsec_offloads;
 	struct mlx5_sd          *sd;
+	enum mlx5_wc_state wc_state;
+	/* sync write combining state */
+	struct mutex wc_state_lock;
 };
 
 struct mlx5_db {
@@ -1375,4 +1384,6 @@ static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev)
 enum {
 	MLX5_OCTWORD = 16,
 };
+
+bool mlx5_wc_support_get(struct mlx5_core_dev *mdev);
 #endif /* MLX5_DRIVER_H */
-- 
cgit v1.2.3


From b339e0a39dc37726712b9f0485d78fe4306d1667 Mon Sep 17 00:00:00 2001
From: Patrisious Haddad <phaddad@nvidia.com>
Date: Thu, 13 Jun 2024 21:00:04 +0300
Subject: RDMA/mlx5: Add Qcounters
 req_transport_retries_exceeded/req_rnr_retries_exceeded

The req_transport_retries_exceeded counter shows the number of times
requester detected transport retries exceed error.

The req_rnr_retries_exceeded counter show the number of times the
requester detected RNR NAKs retries exceed error.

Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Link: https://lore.kernel.org/r/250466af94f4989d638fab168e246035530e912f.1718301543.git.leon@kernel.org
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 5df52e15f7d6..09d9d87d62c6 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5629,7 +5629,11 @@ struct mlx5_ifc_query_q_counter_out_bits {
 
 	u8         local_ack_timeout_err[0x20];
 
-	u8         reserved_at_320[0xa0];
+	u8         reserved_at_320[0x60];
+
+	u8         req_rnr_retries_exceeded[0x20];
+
+	u8         reserved_at_3a0[0x20];
 
 	u8         resp_local_length_error[0x20];
 
-- 
cgit v1.2.3


From 81cc927d9c5eefd4a1b08e16b0ab2263f36d03f7 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@suse.de>
Date: Thu, 23 May 2024 17:45:17 -0400
Subject: io_uring: Drop per-ctx dummy_ubuf

Commit 19a63c402170 ("io_uring/rsrc: keep one global dummy_ubuf")
replaced it with a global static object but this stayed behind.

Fixes: 19a63c402170 ("io_uring/rsrc: keep one global dummy_ubuf")
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/20240523214517.31803-1-krisman@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index b48570eaa449..93c9044ec3fe 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -373,7 +373,6 @@ struct io_ring_ctx {
 	struct io_restriction		restrictions;
 
 	/* slow path rsrc auxilary data, used by update/register */
-	struct io_mapped_ubuf		*dummy_ubuf;
 	struct io_rsrc_data		*file_data;
 	struct io_rsrc_data		*buf_data;
 
-- 
cgit v1.2.3


From 200f3abd14db55f9aadcb74f4e7a678f1c469ba1 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 3 Jun 2024 11:51:19 -0600
Subject: io_uring/eventfd: move eventfd handling to separate file

This is pretty nicely abstracted already, but let's move it to a separate
file rather than have it in the main io_uring file. With that, we can
also move the io_ev_fd struct and enum out of global scope.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 93c9044ec3fe..850e30be9322 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -211,14 +211,6 @@ struct io_submit_state {
 	struct blk_plug		plug;
 };
 
-struct io_ev_fd {
-	struct eventfd_ctx	*cq_ev_fd;
-	unsigned int		eventfd_async: 1;
-	struct rcu_head		rcu;
-	atomic_t		refs;
-	atomic_t		ops;
-};
-
 struct io_alloc_cache {
 	void			**entries;
 	unsigned int		nr_cached;
-- 
cgit v1.2.3


From 3474d1b93f897ab33ce160e759afd47d5f412de4 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@r7625.kernel.dk>
Date: Thu, 13 Jun 2024 19:28:27 +0000
Subject: io_uring/io-wq: make io_wq_work flags atomic

The work flags can be set/accessed from different tasks, both the
originator of the request, and the io-wq workers. While modifications
aren't concurrent, it still makes KMSAN unhappy. There's no real
downside to just making the flag reading/manipulation use proper
atomics here.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 850e30be9322..1052a68fd68d 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -50,7 +50,7 @@ struct io_wq_work_list {
 
 struct io_wq_work {
 	struct io_wq_work_node list;
-	unsigned flags;
+	atomic_t flags;
 	/* place it here instead of io_kiocb as it fills padding and saves 4B */
 	int cancel_seq;
 };
-- 
cgit v1.2.3


From c3042a5403ef2be622023fcc3b11fc1aa08ba7fa Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Fri, 14 Jun 2024 09:03:44 +0000
Subject: block: Drop locking annotation for limits_lock

Currently compiling block/blk-settings.c with C=1 gives the following
warning:
block/blk-settings.c:262:9: warning: context imbalance in 'queue_limits_commit_update' - wrong count at exit

request_queue.limits_lock is a mutex. Sparse locking annotation for
mutexes are currently not supported - see [0] - so drop that locking
annotation.

[0] https://lore.kernel.org/lkml/cover.1579893447.git.jbi.octave@gmail.com/T/#mbb8bda6c0a7ca7ce19f46df976a8e3b489745488

Fixes: d690cb8ae14bd ("block: add an API to atomically update queue limits")
Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20240614090345.655716-3-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0c247a716885..83af6ac5aa48 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -893,7 +893,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset,
  */
 static inline struct queue_limits
 queue_limits_start_update(struct request_queue *q)
-	__acquires(q->limits_lock)
 {
 	mutex_lock(&q->limits_lock);
 	return q->limits;
-- 
cgit v1.2.3


From 1ccfd1a4c809d99891142c1bb9851daa42e40f0d Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 6 Jun 2024 08:51:54 +0900
Subject: firewire: core: arrangement header inclusion for tracepoints events

It is a bit inconvenient to put the relative path to local header from
tree-wide header.

This commit delegates the selection to include headers into users.

Link: https://lore.kernel.org/r/20240605235155.116468-11-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index 5ccc0d91b220..1a18533778f1 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -11,7 +11,7 @@
 
 #include <linux/firewire-constants.h>
 
-#include "../../../drivers/firewire/packet-header-definitions.h"
+// Some macros are defined in 'drivers/firewire/packet-header-definitions.h'.
 
 // The content of TP_printk field is preprocessed, then put to the module binary.
 #define ASYNC_HEADER_GET_DESTINATION(header)	\
-- 
cgit v1.2.3


From 677ceae190732ee844f940d1e4860af4022d2be3 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 6 Jun 2024 08:51:55 +0900
Subject: firewire: core: add tracepoints event for self_id_sequence

It is helpful to trace the content of self ID sequence when the core
function building bus topology.

This commit adds a tracepoints event fot the purpose. It seems not to
achieve printing variable length of array in print time without any
storage, thus the structure of event includes a superfluous array to store
the state of port. Additionally, there is no helper function to print
symbol array, thus the state of port is printed as raw value.

Link: https://lore.kernel.org/r/20240605235155.116468-12-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 59 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index 1a18533778f1..1d0d63b0f8e7 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -366,6 +366,65 @@ TRACE_EVENT(bus_reset_handle,
 	)
 );
 
+// Some macros are defined in 'drivers/firewire/phy-packet-definitions.h'.
+
+// The content of TP_printk field is preprocessed, then put to the module binary.
+
+#define PHY_PACKET_SELF_ID_GET_PHY_ID(quads)		\
+	((((const u32 *)quads)[0] & SELF_ID_PHY_ID_MASK) >> SELF_ID_PHY_ID_SHIFT)
+
+#define PHY_PACKET_SELF_ID_GET_LINK_ACTIVE(quads)	\
+	((((const u32 *)quads)[0] & SELF_ID_ZERO_LINK_ACTIVE_MASK) >> SELF_ID_ZERO_LINK_ACTIVE_SHIFT)
+
+#define PHY_PACKET_SELF_ID_GET_GAP_COUNT(quads)		\
+	((((const u32 *)quads)[0] & SELF_ID_ZERO_GAP_COUNT_MASK) >> SELF_ID_ZERO_GAP_COUNT_SHIFT)
+
+#define PHY_PACKET_SELF_ID_GET_SCODE(quads)		\
+	((((const u32 *)quads)[0] & SELF_ID_ZERO_SCODE_MASK) >> SELF_ID_ZERO_SCODE_SHIFT)
+
+#define PHY_PACKET_SELF_ID_GET_CONTENDER(quads)		\
+	((((const u32 *)quads)[0] & SELF_ID_ZERO_CONTENDER_MASK) >> SELF_ID_ZERO_CONTENDER_SHIFT)
+
+#define PHY_PACKET_SELF_ID_GET_POWER_CLASS(quads)	\
+	((((const u32 *)quads)[0] & SELF_ID_ZERO_POWER_CLASS_MASK) >> SELF_ID_ZERO_POWER_CLASS_SHIFT)
+
+#define PHY_PACKET_SELF_ID_GET_INITIATED_RESET(quads)	\
+	((((const u32 *)quads)[0] & SELF_ID_ZERO_INITIATED_RESET_MASK) >> SELF_ID_ZERO_INITIATED_RESET_SHIFT)
+
+void copy_port_status(u8 *port_status, unsigned int port_capacity, const u32 *self_id_sequence,
+		      unsigned int quadlet_count);
+
+TRACE_EVENT(self_id_sequence,
+	TP_PROTO(const u32 *self_id_sequence, unsigned int quadlet_count, unsigned int generation),
+	TP_ARGS(self_id_sequence, quadlet_count, generation),
+	TP_STRUCT__entry(
+		__field(u8, generation)
+		__dynamic_array(u8, port_status, self_id_sequence_get_port_capacity(quadlet_count))
+		__dynamic_array(u32, self_id_sequence, quadlet_count)
+	),
+	TP_fast_assign(
+		__entry->generation = generation;
+		copy_port_status(__get_dynamic_array(port_status), __get_dynamic_array_len(port_status),
+				 self_id_sequence, quadlet_count);
+		memcpy(__get_dynamic_array(self_id_sequence), self_id_sequence,
+					   __get_dynamic_array_len(self_id_sequence));
+	),
+	TP_printk(
+		"generation=%u phy_id=0x%02x link_active=%s gap_count=%u scode=%u contender=%s power_class=%u initiated_reset=%s port_status=%s self_id_sequence=%s",
+		__entry->generation,
+		PHY_PACKET_SELF_ID_GET_PHY_ID(__get_dynamic_array(self_id_sequence)),
+		PHY_PACKET_SELF_ID_GET_LINK_ACTIVE(__get_dynamic_array(self_id_sequence)) ? "true" : "false",
+		PHY_PACKET_SELF_ID_GET_GAP_COUNT(__get_dynamic_array(self_id_sequence)),
+		PHY_PACKET_SELF_ID_GET_SCODE(__get_dynamic_array(self_id_sequence)),
+		PHY_PACKET_SELF_ID_GET_CONTENDER(__get_dynamic_array(self_id_sequence)) ? "true" : "false",
+		PHY_PACKET_SELF_ID_GET_POWER_CLASS(__get_dynamic_array(self_id_sequence)),
+		PHY_PACKET_SELF_ID_GET_INITIATED_RESET(__get_dynamic_array(self_id_sequence)) ? "true" : "false",
+		__print_array(__get_dynamic_array(port_status), __get_dynamic_array_len(port_status), 1),
+		__print_array(__get_dynamic_array(self_id_sequence),
+			      __get_dynamic_array_len(self_id_sequence) / QUADLET_SIZE, QUADLET_SIZE)
+	)
+);
+
 #undef QUADLET_SIZE
 
 #endif // _FIREWIRE_TRACE_EVENT_H
-- 
cgit v1.2.3


From 2fd22faf0e9f120c39e6f47e5622b0039bb10817 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Fri, 14 Jun 2024 09:42:51 +0900
Subject: firewire: core: record card index in tracepoints event for self ID
 sequence

This patch is for for-next branch.

The selfIDComplete event occurs in the bus managed by one of 1394 OHCI
controller in Linux system, while the existing tracepoints events has
the lack of data about it to distinguish the issued hardware from the
others.

This commit adds card_index member into event structure to store the index
of host controller in use, and prints it.

Link: https://lore.kernel.org/r/20240614004251.460649-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index 1d0d63b0f8e7..d237a30d197b 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -395,14 +395,16 @@ void copy_port_status(u8 *port_status, unsigned int port_capacity, const u32 *se
 		      unsigned int quadlet_count);
 
 TRACE_EVENT(self_id_sequence,
-	TP_PROTO(const u32 *self_id_sequence, unsigned int quadlet_count, unsigned int generation),
-	TP_ARGS(self_id_sequence, quadlet_count, generation),
+	TP_PROTO(unsigned int card_index, const u32 *self_id_sequence, unsigned int quadlet_count, unsigned int generation),
+	TP_ARGS(card_index, self_id_sequence, quadlet_count, generation),
 	TP_STRUCT__entry(
+		__field(u8, card_index)
 		__field(u8, generation)
 		__dynamic_array(u8, port_status, self_id_sequence_get_port_capacity(quadlet_count))
 		__dynamic_array(u32, self_id_sequence, quadlet_count)
 	),
 	TP_fast_assign(
+		__entry->card_index = card_index;
 		__entry->generation = generation;
 		copy_port_status(__get_dynamic_array(port_status), __get_dynamic_array_len(port_status),
 				 self_id_sequence, quadlet_count);
@@ -410,7 +412,8 @@ TRACE_EVENT(self_id_sequence,
 					   __get_dynamic_array_len(self_id_sequence));
 	),
 	TP_printk(
-		"generation=%u phy_id=0x%02x link_active=%s gap_count=%u scode=%u contender=%s power_class=%u initiated_reset=%s port_status=%s self_id_sequence=%s",
+		"card_index=%u generation=%u phy_id=0x%02x link_active=%s gap_count=%u scode=%u contender=%s power_class=%u initiated_reset=%s port_status=%s self_id_sequence=%s",
+		__entry->card_index,
 		__entry->generation,
 		PHY_PACKET_SELF_ID_GET_PHY_ID(__get_dynamic_array(self_id_sequence)),
 		PHY_PACKET_SELF_ID_GET_LINK_ACTIVE(__get_dynamic_array(self_id_sequence)) ? "true" : "false",
-- 
cgit v1.2.3


From 1c75adb22d49ca9389333ca5e6939052a7203111 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 13 Jun 2024 22:59:09 +0200
Subject: ASoC: SOF: mediatek: Constify struct mtk_adsp_ipc_ops

'struct mtk_adsp_ipc_ops' is not modified in these drivers.

Constifying this structure moves some data to a read-only section, so
increase overall security.

In order to do it, "struct mtk_adsp_ipc" also needs to be adjusted to this
new const qualifier.

On a x86_64, with allmodconfig:
Before:
======
   text	   data	    bss	    dec	    hex	filename
  15533	   2383	      0	  17916	   45fc	sound/soc/sof/mediatek/mt8195/mt8195.o

After:
=====
   text	   data	    bss	    dec	    hex	filename
  15557	   2367	      0	  17924	   4604	sound/soc/sof/mediatek/mt8195/mt8195.o

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://msgid.link/r/a45d6b2b5ec040ea0fc78fca662c2dca3f13a49f.1718312321.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/firmware/mediatek/mtk-adsp-ipc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/firmware/mediatek/mtk-adsp-ipc.h b/include/linux/firmware/mediatek/mtk-adsp-ipc.h
index 5b1d16fa3f56..6e86799a7dc4 100644
--- a/include/linux/firmware/mediatek/mtk-adsp-ipc.h
+++ b/include/linux/firmware/mediatek/mtk-adsp-ipc.h
@@ -40,7 +40,7 @@ struct mtk_adsp_chan {
 struct mtk_adsp_ipc {
 	struct mtk_adsp_chan chans[MTK_ADSP_MBOX_NUM];
 	struct device *dev;
-	struct mtk_adsp_ipc_ops *ops;
+	const struct mtk_adsp_ipc_ops *ops;
 	void *private_data;
 };
 
-- 
cgit v1.2.3


From f22b4b55edb507a2b30981e133b66b642be4d13f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 13 Jun 2024 14:33:16 -0700
Subject: net: make for_each_netdev_dump() a little more bug-proof

I find the behavior of xa_for_each_start() slightly counter-intuitive.
It doesn't end the iteration by making the index point after the last
element. IOW calling xa_for_each_start() again after it "finished"
will run the body of the loop for the last valid element, instead
of doing nothing.

This works fine for netlink dumps if they terminate correctly
(i.e. coalesce or carefully handle NLM_DONE), but as we keep getting
reminded legacy dumps are unlikely to go away.

Fixing this generically at the xa_for_each_start() level seems hard -
there is no index reserved for "end of iteration".
ifindexes are 31b wide, tho, and iterator is ulong so for
for_each_netdev_dump() it's safe to go to the next element.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f148a01dd1d1..85111502cf8f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3021,7 +3021,8 @@ int call_netdevice_notifiers_info(unsigned long val,
 #define net_device_entry(lh)	list_entry(lh, struct net_device, dev_list)
 
 #define for_each_netdev_dump(net, d, ifindex)				\
-	xa_for_each_start(&(net)->dev_by_index, (ifindex), (d), (ifindex))
+	for (; (d = xa_find(&(net)->dev_by_index, &ifindex,		\
+			    ULONG_MAX, XA_PRESENT)); ifindex++)
 
 static inline struct net_device *next_net_device(struct net_device *dev)
 {
-- 
cgit v1.2.3


From d25a92ccae6bed02327b63d138e12e7806830f78 Mon Sep 17 00:00:00 2001
From: "D. Wythe" <alibuda@linux.alibaba.com>
Date: Fri, 14 Jun 2024 02:00:30 +0800
Subject: net/smc: Introduce IPPROTO_SMC

This patch allows to create smc socket via AF_INET,
similar to the following code,

/* create v4 smc sock */
v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);

/* create v6 smc sock */
v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);

There are several reasons why we believe it is appropriate here:

1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
address. There is no AF_SMC address at all.

2. Create smc socket in the AF_INET(6) path, which allows us to reuse
the infrastructure of AF_INET(6) path, such as common ebpf hooks.
Otherwise, smc have to implement it again in AF_SMC path.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
Reviewed-by: Dust Li <dust.li@linux.alibaba.com>
Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>
Tested-by: Wenjia Zhang <wenjia@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/in.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index e682ab628dfa..d358add1611c 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -81,6 +81,8 @@ enum {
 #define IPPROTO_ETHERNET	IPPROTO_ETHERNET
   IPPROTO_RAW = 255,		/* Raw IP packets			*/
 #define IPPROTO_RAW		IPPROTO_RAW
+  IPPROTO_SMC = 256,		/* Shared Memory Communications		*/
+#define IPPROTO_SMC		IPPROTO_SMC
   IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
 #define IPPROTO_MPTCP		IPPROTO_MPTCP
   IPPROTO_MAX
-- 
cgit v1.2.3


From a43b9ec091b1b1924ea18883a715e5aadba2543e Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 29 May 2024 10:19:20 -0700
Subject: peci, hwmon: Switch to new Intel CPU model defines

Update peci subsystem to use the same vendor-family-model
combined definition that core x86 code uses.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Iwona Winiarska <iwona.winiarska@intel.com>
Link: https://lore.kernel.org/r/20240529171920.62571-1-tony.luck@intel.com
Signed-off-by: Iwona Winiarska <iwona.winiarska@intel.com>
---
 include/linux/peci-cpu.h | 24 ++++++++++++++++++++++++
 include/linux/peci.h     |  6 ++----
 2 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/peci-cpu.h b/include/linux/peci-cpu.h
index ff8ae9c26c80..601cdd086bf6 100644
--- a/include/linux/peci-cpu.h
+++ b/include/linux/peci-cpu.h
@@ -6,6 +6,30 @@
 
 #include <linux/types.h>
 
+/* Copied from x86 <asm/processor.h> */
+#define X86_VENDOR_INTEL       0
+
+/* Copied from x86 <asm/cpu_device_id.h> */
+#define VFM_MODEL_BIT	0
+#define VFM_FAMILY_BIT	8
+#define VFM_VENDOR_BIT	16
+#define VFM_RSVD_BIT	24
+
+#define	VFM_MODEL_MASK	GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT)
+#define	VFM_FAMILY_MASK	GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT)
+#define	VFM_VENDOR_MASK	GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT)
+
+#define VFM_MODEL(vfm)	(((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT)
+#define VFM_FAMILY(vfm)	(((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT)
+#define VFM_VENDOR(vfm)	(((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT)
+
+#define	VFM_MAKE(_vendor, _family, _model) (	\
+	((_model) << VFM_MODEL_BIT) |		\
+	((_family) << VFM_FAMILY_BIT) |		\
+	((_vendor) << VFM_VENDOR_BIT)		\
+)
+/* End of copied code */
+
 #include "../../arch/x86/include/asm/intel-family.h"
 
 #define PECI_PCS_PKG_ID			0  /* Package Identifier Read */
diff --git a/include/linux/peci.h b/include/linux/peci.h
index 90e241458ef6..3e0bc37591d6 100644
--- a/include/linux/peci.h
+++ b/include/linux/peci.h
@@ -59,8 +59,7 @@ static inline struct peci_controller *to_peci_controller(void *d)
  * struct peci_device - PECI device
  * @dev: device object to register PECI device to the device model
  * @info: PECI device characteristics
- * @info.family: device family
- * @info.model: device model
+ * @info.x86_vfm: device vendor-family-model
  * @info.peci_revision: PECI revision supported by the PECI device
  * @info.socket_id: the socket ID represented by the PECI device
  * @addr: address used on the PECI bus connected to the parent controller
@@ -73,8 +72,7 @@ static inline struct peci_controller *to_peci_controller(void *d)
 struct peci_device {
 	struct device dev;
 	struct {
-		u16 family;
-		u8 model;
+		u32 x86_vfm;
 		u8 peci_revision;
 		u8 socket_id;
 	} info;
-- 
cgit v1.2.3


From f45a6051d582f613f4abc383ae238661f7813302 Mon Sep 17 00:00:00 2001
From: Costa Shulyupin <costa.shul@redhat.com>
Date: Mon, 25 Mar 2024 18:38:10 +0200
Subject: cpu/hotplug: Fix typo in comment

Signed-off-by: Costa Shulyupin <costa.shul@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240325163810.669459-1-costa.shul@redhat.com
---
 include/linux/cpuhotplug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 7a5785f405b6..7f6c820c12eb 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -27,7 +27,7 @@
  * startup callbacks sequentially from CPUHP_OFFLINE + 1 to CPUHP_ONLINE
  * during a CPU online operation. During a CPU offline operation the
  * installed teardown callbacks are invoked in the reverse order from
- * CPU_ONLINE - 1 down to CPUHP_OFFLINE.
+ * CPUHP_ONLINE - 1 down to CPUHP_OFFLINE.
  *
  * The state space has three sections: PREPARE, STARTING and ONLINE.
  *
-- 
cgit v1.2.3


From 299d623f5c9ab48e53255cf6b510627f1ef26dfe Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Fri, 14 Jun 2024 19:32:03 +0200
Subject: irqdomain: Introduce irq_domain_instantiate()

The existing irq_domain_add_*() functions used to instantiate an IRQ
domain are wrappers built on top of __irq_domain_add() and describe the
domain properties using a bunch of parameters.

Adding more parameters and wrappers to hide new parameters in the
existing code lead to more and more code without any relevant value and
without any flexibility.

Introduce irq_domain_instantiate() where the interrupt domain properties
are given using a irq_domain_info structure instead of the bunch of
parameters to allow flexibility and easy evolution.

irq_domain_instantiate() performs the same operation as the one done by
__irq_domain_add(). For compatibility reason with existing code, keep
__irq_domain_add() but convert it to irq_domain_instantiate().

[ tglx: Fixed up struct initializer coding style ]

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240614173232.1184015-3-herve.codina@bootlin.com
---
 include/linux/irqdomain.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 21ecf582a0fe..ab8939c8724d 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -257,6 +257,27 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa)
 }
 
 void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
+/**
+ * struct irq_domain_info - Domain information structure
+ * @fwnode:		firmware node for the interrupt controller
+ * @size:		Size of linear map; 0 for radix mapping only
+ * @hwirq_max:		Maximum number of interrupts supported by controller
+ * @direct_max:		Maximum value of direct maps;
+ *			Use ~0 for no limit; 0 for no direct mapping
+ * @ops:		Domain operation callbacks
+ * @host_data:		Controller private data pointer
+ */
+struct irq_domain_info {
+	struct fwnode_handle			*fwnode;
+	unsigned int				size;
+	irq_hw_number_t				hwirq_max;
+	int					direct_max;
+	const struct irq_domain_ops		*ops;
+	void					*host_data;
+};
+
+struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info);
+
 struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
 				    irq_hw_number_t hwirq_max, int direct_max,
 				    const struct irq_domain_ops *ops,
-- 
cgit v1.2.3


From 922ac2cf9fe444c4aff165b9f7e158a9b651647d Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Fri, 14 Jun 2024 19:32:05 +0200
Subject: irqdomain: Constify parameter in is_fwnode_irqchip()

The fwnode parameter has no reason to be a pointer to an un-const struct
fwnode_handle. Indeed, struct fwnode_handle is not supposed to be modified
by the function.

Be consistent with other function performing the same kind of operation
such as is_of_node(), is_acpi_device_node() or is_software_node(): constify
the fwnode parameter.

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240614173232.1184015-5-herve.codina@bootlin.com
---
 include/linux/irqdomain.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index ab8939c8724d..a3b43e357009 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -314,7 +314,7 @@ static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
 
 extern const struct fwnode_operations irqchip_fwnode_ops;
 
-static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode)
+static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode)
 {
 	return fwnode && fwnode->ops == &irqchip_fwnode_ops;
 }
-- 
cgit v1.2.3


From 757398541c30a5e898169763b43f08dab71ea3bd Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Fri, 14 Jun 2024 19:32:08 +0200
Subject: irqdomain: Handle additional domain flags in irq_domain_instantiate()

In order to use irq_domain_instantiate() from several places such as
irq_domain_create_hierarchy(), irq_domain_instantiate() needs to handle
additional domain flags.

Add the required infrastructure.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240614173232.1184015-8-herve.codina@bootlin.com
---
 include/linux/irqdomain.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index a3b43e357009..4683b66eded9 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -260,6 +260,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
 /**
  * struct irq_domain_info - Domain information structure
  * @fwnode:		firmware node for the interrupt controller
+ * @domain_flags:	Additional flags to add to the domain flags
  * @size:		Size of linear map; 0 for radix mapping only
  * @hwirq_max:		Maximum number of interrupts supported by controller
  * @direct_max:		Maximum value of direct maps;
@@ -269,6 +270,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
  */
 struct irq_domain_info {
 	struct fwnode_handle			*fwnode;
+	unsigned int				domain_flags;
 	unsigned int				size;
 	irq_hw_number_t				hwirq_max;
 	int					direct_max;
-- 
cgit v1.2.3


From 419e3778ff295c00aa158d9f2854a70b47ba1136 Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Fri, 14 Jun 2024 19:32:09 +0200
Subject: irqdomain: Handle domain hierarchy parent in irq_domain_instantiate()

To use irq_domain_instantiate() from irq_domain_create_hierarchy(),
irq_domain_instantiate() needs to handle the domain hierarchy parent.

Add the required functionality.

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240614173232.1184015-9-herve.codina@bootlin.com
---
 include/linux/irqdomain.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 4683b66eded9..e52fd5e5494c 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -276,6 +276,12 @@ struct irq_domain_info {
 	int					direct_max;
 	const struct irq_domain_ops		*ops;
 	void					*host_data;
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+	/**
+	 * @parent: Pointer to the parent irq domain used in a hierarchy domain
+	 */
+	struct irq_domain			*parent;
+#endif
 };
 
 struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info);
-- 
cgit v1.2.3


From 0b21add71bd9cfa2bd6677a0300e15fd4c4b84ed Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Fri, 14 Jun 2024 19:32:12 +0200
Subject: irqdomain: Handle domain bus token in irq_domain_create()

irq_domain_update_bus_token() is the only way to set the domain bus
token. This is sub-optimal as irq_domain_update_bus_token() can be called
only once the domain is created and needs to revert some operations, change
the domain name and redo the operations.

In order to avoid this revert/change/redo sequence, take the domain bus
into account token during the domain creation.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240614173232.1184015-12-herve.codina@bootlin.com
---
 include/linux/irqdomain.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index e52fd5e5494c..52bed23e5c61 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -265,6 +265,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
  * @hwirq_max:		Maximum number of interrupts supported by controller
  * @direct_max:		Maximum value of direct maps;
  *			Use ~0 for no limit; 0 for no direct mapping
+ * @bus_token:		Domain bus token
  * @ops:		Domain operation callbacks
  * @host_data:		Controller private data pointer
  */
@@ -274,6 +275,7 @@ struct irq_domain_info {
 	unsigned int				size;
 	irq_hw_number_t				hwirq_max;
 	int					direct_max;
+	enum irq_domain_bus_token		bus_token;
 	const struct irq_domain_ops		*ops;
 	void					*host_data;
 #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
-- 
cgit v1.2.3


From 44b68de9b8e3dfde12308e8567548799d7ded0de Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Fri, 14 Jun 2024 19:32:13 +0200
Subject: irqdomain: Introduce init() and exit() hooks

The current API does not allow additional initialization before the
domain is published. This can lead to a race condition between consumers
and supplier as a domain can be available for consumers before being
fully ready.

Introduce the init() hook to allow additional initialization before
plublishing the domain. Also introduce the exit() hook to revert
operations done in init() on domain removal.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240614173232.1184015-13-herve.codina@bootlin.com
---
 include/linux/irqdomain.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 52bed23e5c61..2c927edc4d43 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -141,6 +141,7 @@ struct irq_domain_chip_generic;
  *		purposes related to the irq domain.
  * @parent:	Pointer to parent irq_domain to support hierarchy irq_domains
  * @msi_parent_ops: Pointer to MSI parent domain methods for per device domain init
+ * @exit:	Function called when the domain is destroyed
  *
  * Revmap data, used internally by the irq domain code:
  * @revmap_size:	Size of the linear map table @revmap[]
@@ -169,6 +170,7 @@ struct irq_domain {
 #ifdef CONFIG_GENERIC_MSI_IRQ
 	const struct msi_parent_ops	*msi_parent_ops;
 #endif
+	void				(*exit)(struct irq_domain *d);
 
 	/* reverse map data. The linear map gets appended to the irq_domain */
 	irq_hw_number_t			hwirq_max;
@@ -268,6 +270,10 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
  * @bus_token:		Domain bus token
  * @ops:		Domain operation callbacks
  * @host_data:		Controller private data pointer
+ * @init:		Function called when the domain is created.
+ *			Allow to do some additional domain initialisation.
+ * @exit:		Function called when the domain is destroyed.
+ *			Allow to do some additional cleanup operation.
  */
 struct irq_domain_info {
 	struct fwnode_handle			*fwnode;
@@ -284,6 +290,8 @@ struct irq_domain_info {
 	 */
 	struct irq_domain			*parent;
 #endif
+	int					(*init)(struct irq_domain *d);
+	void					(*exit)(struct irq_domain *d);
 };
 
 struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info);
-- 
cgit v1.2.3


From e25f553a92973eaf59ff3a00fe7f61ab01b2877f Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Fri, 14 Jun 2024 19:32:14 +0200
Subject: genirq/generic_chip: Introduce
 irq_domain_{alloc,remove}_generic_chips()

The existing __irq_alloc_domain_generic_chips() uses a bunch of parameters
to describe the generic chips that need to be allocated.

Adding more parameters and wrappers to hide new parameters in the existing
code leads to more and more code without any relevant values and without
any flexibility.

Introduce irq_domain_alloc_generic_chips() where the generic chips
description is done using the irq_domain_chip_generic_info structure
instead of the bunch of parameters to allow flexibility and easy evolution.

Also introduce irq_domain_remove_generic_chips() to revert the operations
done by irq_domain_alloc_generic_chips().

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240614173232.1184015-14-herve.codina@bootlin.com
---
 include/linux/irq.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index a217e1029c1d..58264b236cbf 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1117,6 +1117,27 @@ struct irq_domain_chip_generic {
 	struct irq_chip_generic	*gc[];
 };
 
+/**
+ * struct irq_domain_chip_generic_info - Generic chip information structure
+ * @name:		Name of the generic interrupt chip
+ * @handler:		Interrupt handler used by the generic interrupt chip
+ * @irqs_per_chip:	Number of interrupts each chip handles (max 32)
+ * @num_ct:		Number of irq_chip_type instances associated with each
+ *			chip
+ * @irq_flags_to_clear:	IRQ_* bits to clear in the mapping function
+ * @irq_flags_to_set:	IRQ_* bits to set in the mapping function
+ * @gc_flags:		Generic chip specific setup flags
+ */
+struct irq_domain_chip_generic_info {
+	const char		*name;
+	irq_flow_handler_t	handler;
+	unsigned int		irqs_per_chip;
+	unsigned int		num_ct;
+	unsigned int		irq_flags_to_clear;
+	unsigned int		irq_flags_to_set;
+	enum irq_gc_flags	gc_flags;
+};
+
 /* Generic chip callback functions */
 void irq_gc_noop(struct irq_data *d);
 void irq_gc_mask_disable_reg(struct irq_data *d);
@@ -1153,6 +1174,10 @@ int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc,
 
 struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq);
 
+int irq_domain_alloc_generic_chips(struct irq_domain *d,
+				   const struct irq_domain_chip_generic_info *info);
+void irq_domain_remove_generic_chips(struct irq_domain *d);
+
 int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
 				     int num_ct, const char *name,
 				     irq_flow_handler_t handler,
-- 
cgit v1.2.3


From fea922ee9f8ffd3c2a8e8dfbc68de42905a3982a Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Fri, 14 Jun 2024 19:32:15 +0200
Subject: genirq/generic_chip: Introduce init() and exit() hooks

Most of generic chip drivers need to perform some more additional
initializations on the generic chips allocated before they can be fully
ready.

These additional initializations need to be performed before the IRQ
domain is published to avoid a race condition between IRQ consumers and
suppliers.

Introduce the init() hook to perform these initializations at the right
place just after the generic chip creation. Also introduce the exit() hook
to allow reverting operations done by the init() hook just before the
generic chip is destroyed.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240614173232.1184015-15-herve.codina@bootlin.com
---
 include/linux/irq.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 58264b236cbf..712bcee56efc 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1106,6 +1106,7 @@ enum irq_gc_flags {
  * @irq_flags_to_set:	IRQ* flags to set on irq setup
  * @irq_flags_to_clear:	IRQ* flags to clear on irq setup
  * @gc_flags:		Generic chip specific setup flags
+ * @exit:		Function called on each chip when they are destroyed.
  * @gc:			Array of pointers to generic interrupt chips
  */
 struct irq_domain_chip_generic {
@@ -1114,6 +1115,7 @@ struct irq_domain_chip_generic {
 	unsigned int		irq_flags_to_clear;
 	unsigned int		irq_flags_to_set;
 	enum irq_gc_flags	gc_flags;
+	void			(*exit)(struct irq_chip_generic *gc);
 	struct irq_chip_generic	*gc[];
 };
 
@@ -1127,6 +1129,10 @@ struct irq_domain_chip_generic {
  * @irq_flags_to_clear:	IRQ_* bits to clear in the mapping function
  * @irq_flags_to_set:	IRQ_* bits to set in the mapping function
  * @gc_flags:		Generic chip specific setup flags
+ * @init:		Function called on each chip when they are created.
+ *			Allow to do some additional chip initialisation.
+ * @exit:		Function called on each chip when they are destroyed.
+ *			Allow to do some chip cleanup operation.
  */
 struct irq_domain_chip_generic_info {
 	const char		*name;
@@ -1136,6 +1142,8 @@ struct irq_domain_chip_generic_info {
 	unsigned int		irq_flags_to_clear;
 	unsigned int		irq_flags_to_set;
 	enum irq_gc_flags	gc_flags;
+	int			(*init)(struct irq_chip_generic *gc);
+	void			(*exit)(struct irq_chip_generic *gc);
 };
 
 /* Generic chip callback functions */
-- 
cgit v1.2.3


From e6f67ce32e8e6dcbadf42dc435fbc9002cabf1f9 Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Fri, 14 Jun 2024 19:32:16 +0200
Subject: irqdomain: Add support for generic irq chips creation before
 publishing a domain

The current API functions create an irq_domain and also publish this
newly created to domain. Once an irq_domain is published, consumers can
request IRQ in order to use them.

Some interrupt controller drivers have to perform some more operations
with the created irq_domain in order to have it ready to be used.
For instance:
   - Allocate generic irq chips with irq_alloc_domain_generic_chips()
   - Retrieve the generic irq chips with irq_get_domain_generic_chip()
   - Initialize retrieved chips: set register base address and offsets,
     set several hooks such as irq_mask, irq_unmask, ...

With the newly introduced irq_domain_alloc_generic_chips(), an interrupt
controller driver can use the irq_domain_chip_generic_info structure and
set the init() hook to perform its generic chips initialization.

In order to avoid a window where the domain is published but not yet
ready to be used, handle the generic chip creation (i.e the
irq_domain_alloc_generic_chips() call) before the domain is published.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240614173232.1184015-16-herve.codina@bootlin.com
---
 include/linux/irqdomain.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 2c927edc4d43..5540b22a755c 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -210,6 +210,9 @@ enum {
 	/* Irq domain is a MSI device domain */
 	IRQ_DOMAIN_FLAG_MSI_DEVICE	= (1 << 9),
 
+	/* Irq domain must destroy generic chips when removed */
+	IRQ_DOMAIN_FLAG_DESTROY_GC	= (1 << 10),
+
 	/*
 	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
 	 * for implementation specific purposes and ignored by the
@@ -259,6 +262,9 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa)
 }
 
 void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
+
+struct irq_domain_chip_generic_info;
+
 /**
  * struct irq_domain_info - Domain information structure
  * @fwnode:		firmware node for the interrupt controller
@@ -270,6 +276,8 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
  * @bus_token:		Domain bus token
  * @ops:		Domain operation callbacks
  * @host_data:		Controller private data pointer
+ * @dgc_info:		Geneneric chip information structure pointer used to
+ *			create generic chips for the domain if not NULL.
  * @init:		Function called when the domain is created.
  *			Allow to do some additional domain initialisation.
  * @exit:		Function called when the domain is destroyed.
@@ -290,6 +298,7 @@ struct irq_domain_info {
 	 */
 	struct irq_domain			*parent;
 #endif
+	struct irq_domain_chip_generic_info	*dgc_info;
 	int					(*init)(struct irq_domain *d);
 	void					(*exit)(struct irq_domain *d);
 };
-- 
cgit v1.2.3


From 0c5b29a6dc7b463b6072da8cef43800008728ff3 Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Fri, 14 Jun 2024 19:32:17 +0200
Subject: irqdomain: Add a resource managed version of irq_domain_instantiate()

Add a devres version of irq_domain_instantiate().

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240614173232.1184015-17-herve.codina@bootlin.com
---
 include/linux/irqdomain.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 5540b22a755c..8820317582c4 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -304,6 +304,8 @@ struct irq_domain_info {
 };
 
 struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info);
+struct irq_domain *devm_irq_domain_instantiate(struct device *dev,
+					       const struct irq_domain_info *info);
 
 struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
 				    irq_hw_number_t hwirq_max, int direct_max,
-- 
cgit v1.2.3


From 7c53626cd11820a11f9cb2c54c02d47fc062a265 Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Fri, 14 Jun 2024 19:32:18 +0200
Subject: irqdomain: Convert __irq_domain_add() wrappers to
 irq_domain_instantiate()

__irq_domain_add() wrappers use directly __irq_domain_add(). With the
introduction of irq_domain_instantiate(), __irq_domain_add() becomes
obsolete.

In order to fully remove __irq_domain_add(), convert wrappers to
irq_domain_instantiate()

[ tglx: Fixup struct initializers ]

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240614173232.1184015-18-herve.codina@bootlin.com
---
 include/linux/irqdomain.h | 58 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 53 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 8820317582c4..33a968fbdda2 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -400,7 +400,17 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
-	return __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data);
+	struct irq_domain_info info = {
+		.fwnode		= of_node_to_fwnode(of_node),
+		.size		= size,
+		.hwirq_max	= size,
+		.ops		= ops,
+		.host_data	= host_data,
+	};
+	struct irq_domain *d;
+
+	d = irq_domain_instantiate(&info);
+	return IS_ERR(d) ? NULL : d;
 }
 
 #ifdef CONFIG_IRQ_DOMAIN_NOMAP
@@ -409,7 +419,17 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
-	return __irq_domain_add(of_node_to_fwnode(of_node), 0, max_irq, max_irq, ops, host_data);
+	struct irq_domain_info info = {
+		.fwnode		= of_node_to_fwnode(of_node),
+		.hwirq_max	= max_irq,
+		.direct_max	= max_irq,
+		.ops		= ops,
+		.host_data	= host_data,
+	};
+	struct irq_domain *d;
+
+	d = irq_domain_instantiate(&info);
+	return IS_ERR(d) ? NULL : d;
 }
 
 extern unsigned int irq_create_direct_mapping(struct irq_domain *host);
@@ -419,7 +439,16 @@ static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
-	return __irq_domain_add(of_node_to_fwnode(of_node), 0, ~0, 0, ops, host_data);
+	struct irq_domain_info info = {
+		.fwnode		= of_node_to_fwnode(of_node),
+		.hwirq_max	= ~0U,
+		.ops		= ops,
+		.host_data	= host_data,
+	};
+	struct irq_domain *d;
+
+	d = irq_domain_instantiate(&info);
+	return IS_ERR(d) ? NULL : d;
 }
 
 static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode,
@@ -427,14 +456,33 @@ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
-	return __irq_domain_add(fwnode, size, size, 0, ops, host_data);
+	struct irq_domain_info info = {
+		.fwnode		= fwnode,
+		.size		= size,
+		.hwirq_max	= size,
+		.ops		= ops,
+		.host_data	= host_data,
+	};
+	struct irq_domain *d;
+
+	d = irq_domain_instantiate(&info);
+	return IS_ERR(d) ? NULL : d;
 }
 
 static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode,
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
-	return __irq_domain_add(fwnode, 0, ~0, 0, ops, host_data);
+	struct irq_domain_info info = {
+		.fwnode		= fwnode,
+		.hwirq_max	= ~0,
+		.ops		= ops,
+		.host_data	= host_data,
+	};
+	struct irq_domain *d;
+
+	d = irq_domain_instantiate(&info);
+	return IS_ERR(d) ? NULL : d;
 }
 
 extern void irq_domain_remove(struct irq_domain *host);
-- 
cgit v1.2.3


From 0b4b172b760efabf8a77ea17644d333fbb444d39 Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Fri, 14 Jun 2024 19:32:21 +0200
Subject: irqdomain: Remove __irq_domain_add()

__irq_domain_add() has been replaced by irq_domain_instanciate() and so,
it is no more used.

Simply remove it.

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240614173232.1184015-21-herve.codina@bootlin.com
---
 include/linux/irqdomain.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 33a968fbdda2..02cd486ac354 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -184,7 +184,7 @@ enum {
 	/* Irq domain is hierarchical */
 	IRQ_DOMAIN_FLAG_HIERARCHY	= (1 << 0),
 
-	/* Irq domain name was allocated in __irq_domain_add() */
+	/* Irq domain name was allocated internally */
 	IRQ_DOMAIN_NAME_ALLOCATED	= (1 << 1),
 
 	/* Irq domain is an IPI domain with virq per cpu */
@@ -307,10 +307,6 @@ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info);
 struct irq_domain *devm_irq_domain_instantiate(struct device *dev,
 					       const struct irq_domain_info *info);
 
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
-				    irq_hw_number_t hwirq_max, int direct_max,
-				    const struct irq_domain_ops *ops,
-				    void *host_data);
 struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode,
 					    unsigned int size,
 					    unsigned int first_irq,
-- 
cgit v1.2.3


From 1037e4c53e851682ff8d1ab656567a4d5a333c93 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 14 Jun 2024 12:58:48 +0300
Subject: cpu/hotplug: Add support for declaring CPU offlining not supported

The ACPI MADT mailbox wakeup method doesn't allow to offline a CPU after
it has been woken up.

Currently, offlining is prevented based on the confidential computing attribute
which is set for Intel TDX. But TDX is not the only possible user of the wake up
method. The MADT wakeup can be implemented outside of a confidential computing
environment. Offline support is a property of the wakeup method, not the CoCo
implementation.

Introduce cpu_hotplug_disable_offlining() that can be called to indicate that
CPU offlining should be disabled.

This function is going to replace CC_ATTR_HOTPLUG_DISABLED for ACPI MADT wakeup
method.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Tao Liu <ltao@redhat.com>
Link: https://lore.kernel.org/r/20240614095904.1345461-4-kirill.shutemov@linux.intel.com
---
 include/linux/cpuhplock.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h
index 431560bbd045..f7aa20f62b87 100644
--- a/include/linux/cpuhplock.h
+++ b/include/linux/cpuhplock.h
@@ -21,6 +21,7 @@ void cpus_read_lock(void);
 void cpus_read_unlock(void);
 int  cpus_read_trylock(void);
 void lockdep_assert_cpus_held(void);
+void cpu_hotplug_disable_offlining(void);
 void cpu_hotplug_disable(void);
 void cpu_hotplug_enable(void);
 void clear_tasks_mm_cpumask(int cpu);
@@ -36,6 +37,7 @@ static inline void cpus_read_lock(void) { }
 static inline void cpus_read_unlock(void) { }
 static inline int  cpus_read_trylock(void) { return true; }
 static inline void lockdep_assert_cpus_held(void) { }
+static inline void cpu_hotplug_disable_offlining(void) { }
 static inline void cpu_hotplug_disable(void) { }
 static inline void cpu_hotplug_enable(void) { }
 static inline int remove_cpu(unsigned int cpu) { return -EPERM; }
-- 
cgit v1.2.3


From 66e48e491d1e1a0f243ebfcb9639b23de1a5db5e Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 14 Jun 2024 12:58:49 +0300
Subject: cpu/hotplug, x86/acpi: Disable CPU offlining for ACPI MADT wakeup

ACPI MADT doesn't allow to offline a CPU after it has been woken up.

Currently, CPU hotplug is prevented based on the confidential computing
attribute which is set for Intel TDX. But TDX is not the only possible user of
the wake up method. Any platform that uses ACPI MADT wakeup method cannot
offline CPU.

Disable CPU offlining on ACPI MADT wakeup enumeration.

This has no visible effects for users: currently, TDX guest is the only platform
that uses the ACPI MADT wakeup method.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Tao Liu <ltao@redhat.com>
Link: https://lore.kernel.org/r/20240614095904.1345461-5-kirill.shutemov@linux.intel.com
---
 include/linux/cc_platform.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index 60693a145894..caa4b4430634 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -81,16 +81,6 @@ enum cc_attr {
 	 */
 	CC_ATTR_GUEST_SEV_SNP,
 
-	/**
-	 * @CC_ATTR_HOTPLUG_DISABLED: Hotplug is not supported or disabled.
-	 *
-	 * The platform/OS is running as a guest/virtual machine does not
-	 * support CPU hotplug feature.
-	 *
-	 * Examples include TDX Guest.
-	 */
-	CC_ATTR_HOTPLUG_DISABLED,
-
 	/**
 	 * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host.
 	 *
-- 
cgit v1.2.3


From 6630cbce7cd7785f76b1055f33a71199ef28510b Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 14 Jun 2024 12:58:59 +0300
Subject: x86/acpi: Rename fields in the acpi_madt_multiproc_wakeup structure

In order to support MADT wakeup structure version 1, provide more appropriate
names for the fields in the structure.

Rename 'mailbox_version' to 'version'. This field signifies the version of the
structure and the related protocols, rather than the version of the mailbox.
This field has not been utilized in the code thus far.

Rename 'base_address' to 'mailbox_address' to clarify the kind of address it
represents. In version 1, the structure includes the reset vector address. Clear
and distinct naming helps to prevent any confusion.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Tao Liu <ltao@redhat.com>
Link: https://lore.kernel.org/r/20240614095904.1345461-15-kirill.shutemov@linux.intel.com
---
 include/acpi/actbl2.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index ae747c89d92c..fa63362469aa 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -1194,9 +1194,9 @@ struct acpi_madt_generic_translator {
 
 struct acpi_madt_multiproc_wakeup {
 	struct acpi_subtable_header header;
-	u16 mailbox_version;
+	u16 version;
 	u32 reserved;		/* reserved - must be zero */
-	u64 base_address;
+	u64 mailbox_address;
 };
 
 #define ACPI_MULTIPROC_WAKEUP_MB_OS_SIZE        2032
-- 
cgit v1.2.3


From 1ceebe2e46720de02af4cf626dc847ecc4a263fd Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 14 Jun 2024 12:59:03 +0300
Subject: x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method

MADT Multiprocessor Wakeup structure version 1 brings support for CPU offlining:
BIOS provides a reset vector where the CPU has to jump to for offlining itself.
The new TEST mailbox command can be used to test whether the CPU offlined itself
which means the BIOS has control over the CPU and can online it again via the
ACPI MADT wakeup method.

Add CPU offlining support for the ACPI MADT wakeup method by implementing custom
cpu_die(), play_dead() and stop_this_cpu() SMP operations.

CPU offlining makes it possible to hand over secondary CPUs over kexec, not
limiting the second kernel to a single CPU.

The change conforms to the approved ACPI spec change proposal. See the Link.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Kai Huang <kai.huang@intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Tao Liu <ltao@redhat.com>
Link: https://lore.kernel.org/all/13356251.uLZWGnKmhe@kreacher
Link: https://lore.kernel.org/r/20240614095904.1345461-19-kirill.shutemov@linux.intel.com
---
 include/acpi/actbl2.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index fa63362469aa..e27958ef8264 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -1197,8 +1197,20 @@ struct acpi_madt_multiproc_wakeup {
 	u16 version;
 	u32 reserved;		/* reserved - must be zero */
 	u64 mailbox_address;
+	u64 reset_vector;
 };
 
+/* Values for Version field above */
+
+enum acpi_madt_multiproc_wakeup_version {
+	ACPI_MADT_MP_WAKEUP_VERSION_NONE = 0,
+	ACPI_MADT_MP_WAKEUP_VERSION_V1 = 1,
+	ACPI_MADT_MP_WAKEUP_VERSION_RESERVED = 2, /* 2 and greater are reserved */
+};
+
+#define ACPI_MADT_MP_WAKEUP_SIZE_V0	16
+#define ACPI_MADT_MP_WAKEUP_SIZE_V1	24
+
 #define ACPI_MULTIPROC_WAKEUP_MB_OS_SIZE        2032
 #define ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE  2048
 
@@ -1211,7 +1223,8 @@ struct acpi_madt_multiproc_wakeup_mailbox {
 	u8 reserved_firmware[ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE];	/* reserved for firmware use */
 };
 
-#define ACPI_MP_WAKE_COMMAND_WAKEUP    1
+#define ACPI_MP_WAKE_COMMAND_WAKEUP	1
+#define ACPI_MP_WAKE_COMMAND_TEST	2
 
 /* 17: CPU Core Interrupt Controller (ACPI 6.5) */
 
-- 
cgit v1.2.3


From d4a7d067e061c95c6387cf537258082074a4d299 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Mon, 17 Jun 2024 14:57:34 +0200
Subject: ASoC: soc-dai.h: Constify DAI ops auto_selectable_formats

The core ASoC code does not modify contents of the
'auto_selectable_formats' array passed in 'struct snd_soc_dai_ops', so
make it const for code safety.

Reviewed-by: Herve Codina <herve.codina@bootlin.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://msgid.link/r/20240617125735.582963-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 15ef268c9845..279223c4ef5e 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -361,7 +361,7 @@ struct snd_soc_dai_ops {
 	 * see
 	 *	snd_soc_dai_get_fmt()
 	 */
-	u64 *auto_selectable_formats;
+	const u64 *auto_selectable_formats;
 	int num_auto_selectable_formats;
 
 	/* probe ordering - for components with runtime dependencies */
-- 
cgit v1.2.3


From 614dc0fb76327dbd81abd4612fbc2e4ba8f205e6 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Wed, 5 Jun 2024 10:18:52 -0500
Subject: sev-guest: configfs-tsm: Allow the privlevel_floor attribute to be
 updated

With the introduction of an SVSM, Linux will be running at a non-zero
VMPL. Any request for an attestation report at a higher privilege VMPL
than what Linux is currently running will result in an error. Allow for
the privlevel_floor attribute to be updated dynamically.

  [ bp: Trim commit message. ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/5a736be9384aebd98a0b7c929660f8a97cbdc366.1717600736.git.thomas.lendacky@amd.com
---
 include/linux/tsm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tsm.h b/include/linux/tsm.h
index de8324a2223c..50c5769657d8 100644
--- a/include/linux/tsm.h
+++ b/include/linux/tsm.h
@@ -54,7 +54,7 @@ struct tsm_report {
  */
 struct tsm_ops {
 	const char *name;
-	const unsigned int privlevel_floor;
+	unsigned int privlevel_floor;
 	int (*report_new)(struct tsm_report *report, void *data);
 };
 
-- 
cgit v1.2.3


From 0e6a35b93745bb2d8b921fd0520ef730489d41a2 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Wed, 5 Jun 2024 10:18:53 -0500
Subject: fs/configfs: Add a callback to determine attribute visibility

In order to support dynamic decisions as to whether an attribute should be
created, add a callback that returns a bool to indicate whether the
attribute should be displayed. If no callback is registered, the attribute
is displayed by default.

Co-developed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/e555c8740a263fab9f83b2cbb44da1af49a2813c.1717600736.git.thomas.lendacky@amd.com
---
 include/linux/configfs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 2606711adb18..c771e9d0d0b9 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -216,6 +216,9 @@ struct configfs_group_operations {
 	struct config_group *(*make_group)(struct config_group *group, const char *name);
 	void (*disconnect_notify)(struct config_group *group, struct config_item *item);
 	void (*drop_item)(struct config_group *group, struct config_item *item);
+	bool (*is_visible)(struct config_item *item, struct configfs_attribute *attr, int n);
+	bool (*is_bin_visible)(struct config_item *item, struct configfs_bin_attribute *attr,
+			       int n);
 };
 
 struct configfs_subsystem {
-- 
cgit v1.2.3


From 20dfee95936413708701eb151f419597fdd9d948 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Wed, 5 Jun 2024 10:18:54 -0500
Subject: x86/sev: Take advantage of configfs visibility support in TSM

The TSM attestation report support provides multiple configfs attribute
types (both for standard and binary attributes) to allow for additional
attributes to be displayed for SNP as compared to TDX. With the ability
to hide attributes via configfs, consolidate the multiple attribute groups
into a single standard attribute group and a single binary attribute
group. Modify the TDX support to hide the attributes that were previously
"hidden" as a result of registering the selective attribute groups.

Co-developed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Link: https://lore.kernel.org/r/8873c45d0c8abc35aaf01d7833a55788a6905727.1717600736.git.thomas.lendacky@amd.com
---
 include/linux/tsm.h | 38 +++++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/tsm.h b/include/linux/tsm.h
index 50c5769657d8..30d9d270b446 100644
--- a/include/linux/tsm.h
+++ b/include/linux/tsm.h
@@ -42,12 +42,40 @@ struct tsm_report {
 	u8 *auxblob;
 };
 
+/**
+ * enum tsm_attr_index - index used to reference report attributes
+ * @TSM_REPORT_GENERATION: index of the report generation number attribute
+ * @TSM_REPORT_PROVIDER: index of the provider name attribute
+ * @TSM_REPORT_PRIVLEVEL: index of the desired privilege level attribute
+ * @TSM_REPORT_PRIVLEVEL_FLOOR: index of the minimum allowed privileg level attribute
+ */
+enum tsm_attr_index {
+	TSM_REPORT_GENERATION,
+	TSM_REPORT_PROVIDER,
+	TSM_REPORT_PRIVLEVEL,
+	TSM_REPORT_PRIVLEVEL_FLOOR,
+};
+
+/**
+ * enum tsm_bin_attr_index - index used to reference binary report attributes
+ * @TSM_REPORT_INBLOB: index of the binary report input attribute
+ * @TSM_REPORT_OUTBLOB: index of the binary report output attribute
+ * @TSM_REPORT_AUXBLOB: index of the binary auxiliary data attribute
+ */
+enum tsm_bin_attr_index {
+	TSM_REPORT_INBLOB,
+	TSM_REPORT_OUTBLOB,
+	TSM_REPORT_AUXBLOB,
+};
+
 /**
  * struct tsm_ops - attributes and operations for tsm instances
  * @name: tsm id reflected in /sys/kernel/config/tsm/report/$report/provider
  * @privlevel_floor: convey base privlevel for nested scenarios
  * @report_new: Populate @report with the report blob and auxblob
  * (optional), return 0 on successful population, or -errno otherwise
+ * @report_attr_visible: show or hide a report attribute entry
+ * @report_bin_attr_visible: show or hide a report binary attribute entry
  *
  * Implementation specific ops, only one is expected to be registered at
  * a time i.e. only one of "sev-guest", "tdx-guest", etc.
@@ -56,14 +84,10 @@ struct tsm_ops {
 	const char *name;
 	unsigned int privlevel_floor;
 	int (*report_new)(struct tsm_report *report, void *data);
+	bool (*report_attr_visible)(int n);
+	bool (*report_bin_attr_visible)(int n);
 };
 
-extern const struct config_item_type tsm_report_default_type;
-
-/* publish @privlevel, @privlevel_floor, and @auxblob attributes */
-extern const struct config_item_type tsm_report_extra_type;
-
-int tsm_register(const struct tsm_ops *ops, void *priv,
-		 const struct config_item_type *type);
+int tsm_register(const struct tsm_ops *ops, void *priv);
 int tsm_unregister(const struct tsm_ops *ops);
 #endif /* __TSM_H */
-- 
cgit v1.2.3


From 627dc671518b7f004ce04c45e8711f8dca94a57c Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Wed, 5 Jun 2024 10:18:55 -0500
Subject: x86/sev: Extend the config-fs attestation support for an SVSM

When an SVSM is present, the guest can also request attestation reports
from it. These SVSM attestation reports can be used to attest the SVSM
and any services running within the SVSM.

Extend the config-fs attestation support to provide such. This involves
creating four new config-fs attributes:

  - 'service-provider' (input)
    This attribute is used to determine whether the attestation request
    should be sent to the specified service provider or to the SEV
    firmware. The SVSM service provider is represented by the value
    'svsm'.

  - 'service_guid' (input)
    Used for requesting the attestation of a single service within the
    service provider. A null GUID implies that the SVSM_ATTEST_SERVICES
    call should be used to request the attestation report. A non-null
    GUID implies that the SVSM_ATTEST_SINGLE_SERVICE call should be used.

  - 'service_manifest_version' (input)
    Used with the SVSM_ATTEST_SINGLE_SERVICE call, the service version
    represents a specific service manifest version be used for the
    attestation report.

  - 'manifestblob' (output)
    Used to return the service manifest associated with the attestation
    report.

Only display these new attributes when running under an SVSM.

  [ bp: Massage.
   - s/svsm_attestation_call/svsm_attest_call/g ]

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/965015dce3c76bb8724839d50c5dea4e4b5d598f.1717600736.git.thomas.lendacky@amd.com
---
 include/linux/tsm.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/tsm.h b/include/linux/tsm.h
index 30d9d270b446..11b0c525be30 100644
--- a/include/linux/tsm.h
+++ b/include/linux/tsm.h
@@ -4,6 +4,7 @@
 
 #include <linux/sizes.h>
 #include <linux/types.h>
+#include <linux/uuid.h>
 
 #define TSM_INBLOB_MAX 64
 #define TSM_OUTBLOB_MAX SZ_32K
@@ -19,11 +20,17 @@
  * @privlevel: optional privilege level to associate with @outblob
  * @inblob_len: sizeof @inblob
  * @inblob: arbitrary input data
+ * @service_provider: optional name of where to obtain the tsm report blob
+ * @service_guid: optional service-provider service guid to attest
+ * @service_manifest_version: optional service-provider service manifest version requested
  */
 struct tsm_desc {
 	unsigned int privlevel;
 	size_t inblob_len;
 	u8 inblob[TSM_INBLOB_MAX];
+	char *service_provider;
+	guid_t service_guid;
+	unsigned int service_manifest_version;
 };
 
 /**
@@ -33,6 +40,8 @@ struct tsm_desc {
  * @outblob: generated evidence to provider to the attestation agent
  * @auxblob_len: sizeof(@auxblob)
  * @auxblob: (optional) auxiliary data to the report (e.g. certificate data)
+ * @manifestblob_len: sizeof(@manifestblob)
+ * @manifestblob: (optional) manifest data associated with the report
  */
 struct tsm_report {
 	struct tsm_desc desc;
@@ -40,6 +49,8 @@ struct tsm_report {
 	u8 *outblob;
 	size_t auxblob_len;
 	u8 *auxblob;
+	size_t manifestblob_len;
+	u8 *manifestblob;
 };
 
 /**
@@ -48,12 +59,18 @@ struct tsm_report {
  * @TSM_REPORT_PROVIDER: index of the provider name attribute
  * @TSM_REPORT_PRIVLEVEL: index of the desired privilege level attribute
  * @TSM_REPORT_PRIVLEVEL_FLOOR: index of the minimum allowed privileg level attribute
+ * @TSM_REPORT_SERVICE_PROVIDER: index of the service provider identifier attribute
+ * @TSM_REPORT_SERVICE_GUID: index of the service GUID attribute
+ * @TSM_REPORT_SERVICE_MANIFEST_VER: index of the service manifest version attribute
  */
 enum tsm_attr_index {
 	TSM_REPORT_GENERATION,
 	TSM_REPORT_PROVIDER,
 	TSM_REPORT_PRIVLEVEL,
 	TSM_REPORT_PRIVLEVEL_FLOOR,
+	TSM_REPORT_SERVICE_PROVIDER,
+	TSM_REPORT_SERVICE_GUID,
+	TSM_REPORT_SERVICE_MANIFEST_VER,
 };
 
 /**
@@ -61,11 +78,13 @@ enum tsm_attr_index {
  * @TSM_REPORT_INBLOB: index of the binary report input attribute
  * @TSM_REPORT_OUTBLOB: index of the binary report output attribute
  * @TSM_REPORT_AUXBLOB: index of the binary auxiliary data attribute
+ * @TSM_REPORT_MANIFESTBLOB: index of the binary manifest data attribute
  */
 enum tsm_bin_attr_index {
 	TSM_REPORT_INBLOB,
 	TSM_REPORT_OUTBLOB,
 	TSM_REPORT_AUXBLOB,
+	TSM_REPORT_MANIFESTBLOB,
 };
 
 /**
-- 
cgit v1.2.3


From 8cb2dbf94e44bcde4cff0223f2f900f8fb9083a4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 17 Jun 2024 20:31:31 +0200
Subject: irqdomain: Make build work for CONFIG_GENERIC_IRQ_CHIP=n

ld: kernel/irq/irqdomain.o: in function `irq_domain_instantiate':
kernel/irq/irqdomain.c:296:(.text+0x10dd): undefined reference to `irq_domain_alloc_generic_chips'
ld: kernel/irq/irqdomain.c:313:(.text+0x1218): undefined reference to `irq_domain_remove_generic_chips'
ld: kernel/irq/irqdomain.o: in function `irq_domain_remove':
kernel/irq/irqdomain.c:349:(.text+0x1ddf): undefined reference to `irq_domain_remove_generic_chips'

Provide the required stubs.

Fixes: e6f67ce32e8e ("irqdomain: Add support for generic irq chips creation before publishing a domain")
Reported-by: Borislav Betkov <bp@alien8.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 712bcee56efc..1f5dbf1f92c9 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1182,9 +1182,19 @@ int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc,
 
 struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq);
 
+#ifdef CONFIG_GENERIC_IRQ_CHIP
 int irq_domain_alloc_generic_chips(struct irq_domain *d,
 				   const struct irq_domain_chip_generic_info *info);
 void irq_domain_remove_generic_chips(struct irq_domain *d);
+#else
+static inline int
+irq_domain_alloc_generic_chips(struct irq_domain *d,
+			       const struct irq_domain_chip_generic_info *info)
+{
+	return -EINVAL;
+}
+static inline void irq_domain_remove_generic_chips(struct irq_domain *d) { }
+#endif /* CONFIG_GENERIC_IRQ_CHIP */
 
 int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
 				     int num_ct, const char *name,
-- 
cgit v1.2.3


From efb459303dd5dd6e198a0d58322dc04c3356dc23 Mon Sep 17 00:00:00 2001
From: Kory Maincent <kory.maincent@bootlin.com>
Date: Wed, 12 Jun 2024 17:04:02 +0200
Subject: net: Move dev_set_hwtstamp_phylib to net/core/dev.h

This declaration was added to the header to be called from ethtool.
ethtool is separated from core for code organization but it is not really
a separate entity, it controls very core things.
As ethtool is an internal stuff it is not wise to have it in netdevice.h.
Move the declaration to net/core/dev.h instead.

Remove the EXPORT_SYMBOL_GPL call as ethtool can not be built as a module.

Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://lore.kernel.org/r/20240612-feature_ptp_netnext-v15-2-b2a086257b63@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 85111502cf8f..c83b390191d4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3904,9 +3904,6 @@ int generic_hwtstamp_get_lower(struct net_device *dev,
 int generic_hwtstamp_set_lower(struct net_device *dev,
 			       struct kernel_hwtstamp_config *kernel_cfg,
 			       struct netlink_ext_ack *extack);
-int dev_set_hwtstamp_phylib(struct net_device *dev,
-			    struct kernel_hwtstamp_config *cfg,
-			    struct netlink_ext_ack *extack);
 int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata);
 unsigned int dev_get_flags(const struct net_device *);
 int __dev_change_flags(struct net_device *dev, unsigned int flags,
-- 
cgit v1.2.3


From 41474d25bec56900e3a018907784b0abfe5a6a9e Mon Sep 17 00:00:00 2001
From: Nick Hollinghurst <nick.hollinghurst@raspberrypi.com>
Date: Fri, 16 Feb 2024 18:48:55 +0000
Subject: drm: Add DRM_MODE_TV_MODE_MONOCHROME

Add this as a value for enum_drm_connector_tv_mode, represented
by the string "Mono", to generate video with no colour encoding
or bursts. Define it to have no pedestal (since only NTSC-M calls
for a pedestal).

Change default mode creation to acommodate the new tv_mode value
which comprises both 525-line and 625-line formats.

Acked-by: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Nick Hollinghurst <nick.hollinghurst@raspberrypi.com>
Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240216184857.245372-2-dave.stevenson@raspberrypi.com
---
 include/drm/drm_connector.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index f750765d8fbc..c754651044d4 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -201,6 +201,13 @@ enum drm_connector_tv_mode {
 	 */
 	DRM_MODE_TV_MODE_SECAM,
 
+	/**
+	 * @DRM_MODE_TV_MODE_MONOCHROME: Use timings appropriate to
+	 * the DRM mode, including equalizing pulses for a 525-line
+	 * or 625-line mode, with no pedestal or color encoding.
+	 */
+	DRM_MODE_TV_MODE_MONOCHROME,
+
 	/**
 	 * @DRM_MODE_TV_MODE_MAX: Number of analog TV output modes.
 	 *
-- 
cgit v1.2.3


From 777b8afb8179155353ec14b1d8153122410aba29 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 15 Jun 2024 20:00:27 +0800
Subject: net: phy: introduce core support for phy-mode = "10g-qxgmii"

10G-QXGMII is a MAC-to-PHY interface defined by the USXGMII multiport
specification. It uses the same signaling as USXGMII, but it multiplexes
4 ports over the link, resulting in a maximum speed of 2.5G per port.

Some in-tree SoCs like the NXP LS1028A use "usxgmii" when they mean
either the single-port USXGMII or the quad-port 10G-QXGMII variant, and
they could get away just fine with that thus far. But there is a need to
distinguish between the 2 as far as SerDes drivers are concerned.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Luo Jie <quic_luoj@quicinc.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/phy.h     | 4 ++++
 include/linux/phylink.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index e6e83304558e..205fccfc0f60 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -128,6 +128,7 @@ extern const int phy_10gbit_features_array[1];
  * @PHY_INTERFACE_MODE_10GKR: 10GBASE-KR - with Clause 73 AN
  * @PHY_INTERFACE_MODE_QUSGMII: Quad Universal SGMII
  * @PHY_INTERFACE_MODE_1000BASEKX: 1000Base-KX - with Clause 73 AN
+ * @PHY_INTERFACE_MODE_10G_QXGMII: 10G-QXGMII - 4 ports over 10G USXGMII
  * @PHY_INTERFACE_MODE_MAX: Book keeping
  *
  * Describes the interface between the MAC and PHY.
@@ -168,6 +169,7 @@ typedef enum {
 	PHY_INTERFACE_MODE_10GKR,
 	PHY_INTERFACE_MODE_QUSGMII,
 	PHY_INTERFACE_MODE_1000BASEKX,
+	PHY_INTERFACE_MODE_10G_QXGMII,
 	PHY_INTERFACE_MODE_MAX,
 } phy_interface_t;
 
@@ -289,6 +291,8 @@ static inline const char *phy_modes(phy_interface_t interface)
 		return "100base-x";
 	case PHY_INTERFACE_MODE_QUSGMII:
 		return "qusgmii";
+	case PHY_INTERFACE_MODE_10G_QXGMII:
+		return "10g-qxgmii";
 	default:
 		return "unknown";
 	}
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index a30a692acc32..2381e07429a2 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -654,6 +654,7 @@ static inline int phylink_get_link_timer_ns(phy_interface_t interface)
 	case PHY_INTERFACE_MODE_SGMII:
 	case PHY_INTERFACE_MODE_QSGMII:
 	case PHY_INTERFACE_MODE_USXGMII:
+	case PHY_INTERFACE_MODE_10G_QXGMII:
 		return 1600000;
 
 	case PHY_INTERFACE_MODE_1000BASEX:
-- 
cgit v1.2.3


From c2bc958b2b03e361f14df99983bc64a39a7323a3 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 17 Jun 2024 13:06:27 +0200
Subject: fbdev: vesafb: Detect VGA compatibility from screen info's VESA
 attributes

Test the vesa_attributes field in struct screen_info for compatibility
with VGA hardware. Vesafb currently tests bit 1 in screen_info's
capabilities field which indicates a 64-bit lfb address and is
unrelated to VGA compatibility.

Section 4.4 of the Vesa VBE 2.0 specifications defines that bit 5 in
the mode's attributes field signals VGA compatibility. The mode is
compatible with VGA hardware if the bit is clear. In that case, the
driver can access VGA state of the VBE's underlying hardware. The
vesafb driver uses this feature to program the color LUT in palette
modes. Without, colors might be incorrect.

The problem got introduced in commit 89ec4c238e7a ("[PATCH] vesafb: Fix
incorrect logo colors in x86_64"). It incorrectly stores the mode
attributes in the screen_info's capabilities field and updates vesafb
accordingly. Later, commit 5e8ddcbe8692 ("Video mode probing support for
the new x86 setup code") fixed the screen_info, but did not update vesafb.
Color output still tends to work, because bit 1 in capabilities is
usually 0.

Besides fixing the bug in vesafb, this commit introduces a helper that
reads the correct bit from screen_info.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 5e8ddcbe8692 ("Video mode probing support for the new x86 setup code")
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Cc: <stable@vger.kernel.org> # v2.6.23+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/linux/screen_info.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index 75303c126285..6a4a3cec4638 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -49,6 +49,16 @@ static inline u64 __screen_info_lfb_size(const struct screen_info *si, unsigned
 	return lfb_size;
 }
 
+static inline bool __screen_info_vbe_mode_nonvga(const struct screen_info *si)
+{
+	/*
+	 * VESA modes typically run on VGA hardware. Set bit 5 signals that this
+	 * is not the case. Drivers can then not make use of VGA resources. See
+	 * Sec 4.4 of the VBE 2.0 spec.
+	 */
+	return si->vesa_attributes & BIT(5);
+}
+
 static inline unsigned int __screen_info_video_type(unsigned int type)
 {
 	switch (type) {
-- 
cgit v1.2.3


From 2fbafecb0f05818e25f6c926c6f9ad9ef597429c Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Mon, 17 Jun 2024 15:03:19 +0200
Subject: ASoC: Constify of_phandle_args in snd_soc_dai_driver

ASoC core code does not modify contents of 'of_phandle_args' in 'struct
snd_soc_dai_driver', so the pointer can be made as a pointer to const.
This makes code safer, serves as clear annotation of core's intentions
and allows putting pointed structures in rodata (if ever applicable).

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-1-8004f346ee38@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 15ef268c9845..e6c61c79c483 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -413,7 +413,7 @@ struct snd_soc_dai_driver {
 	unsigned int id;
 	unsigned int base;
 	struct snd_soc_dobj dobj;
-	struct of_phandle_args *dai_args;
+	const struct of_phandle_args *dai_args;
 
 	/* ops */
 	const struct snd_soc_dai_ops *ops;
-- 
cgit v1.2.3


From 020b37d06f97de289940805bc821190d5858eda0 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Mon, 17 Jun 2024 15:03:20 +0200
Subject: ASoC: Constify of_phandle_args in snd_soc_dai_link_component

ASoC core code does not modify contents of 'of_phandle_args' in 'struct
snd_soc_dai_link_component', so the pointer can be made as a pointer to
const.  This makes code safer, serves as clear annotation of core's
intentions and allows putting pointed structures in rodata (if ever
applicable).

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-2-8004f346ee38@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 33671437ee89..f02a51694ab4 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -675,7 +675,7 @@ struct snd_soc_dai_link_component {
 	const char *name;
 	struct device_node *of_node;
 	const char *dai_name;
-	struct of_phandle_args *dai_args;
+	const struct of_phandle_args *dai_args;
 };
 
 /*
-- 
cgit v1.2.3


From f3ac3da7e4d0957b3402fb31a4ca480e739e086f Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Mon, 17 Jun 2024 15:03:21 +0200
Subject: ASoC: Constify passed data to core function

Several ASoC functions receive pointers to data which is not modified,
e.g. pointers to 'snd_soc_dai', 'snd_soc_pcm_runtime',
'snd_pcm_hw_params' and 'snd_soc_dai_link'.

All these pointers can be made as a pointer to const.  This makes code
safer, serves as clear annotation of function's intentions (no ownership
passed to the function, no modifications) and allows putting pointed
structures in rodata (if ever applicable).

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-3-8004f346ee38@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 18 ++++++++++--------
 include/sound/soc.h     | 17 +++++++++--------
 2 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index e6c61c79c483..3c45b418270e 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -180,8 +180,8 @@ int snd_soc_dai_set_pll(struct snd_soc_dai *dai,
 int snd_soc_dai_set_bclk_ratio(struct snd_soc_dai *dai, unsigned int ratio);
 
 /* Digital Audio interface formatting */
-int snd_soc_dai_get_fmt_max_priority(struct snd_soc_pcm_runtime *rtd);
-u64 snd_soc_dai_get_fmt(struct snd_soc_dai *dai, int priority);
+int snd_soc_dai_get_fmt_max_priority(const struct snd_soc_pcm_runtime *rtd);
+u64 snd_soc_dai_get_fmt(const struct snd_soc_dai *dai, int priority);
 int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt);
 
 int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai,
@@ -202,7 +202,7 @@ int snd_soc_dai_get_channel_map(struct snd_soc_dai *dai,
 		unsigned int *tx_num, unsigned int *tx_slot,
 		unsigned int *rx_num, unsigned int *rx_slot);
 
-int snd_soc_dai_is_dummy(struct snd_soc_dai *dai);
+int snd_soc_dai_is_dummy(const struct snd_soc_dai *dai);
 
 int snd_soc_dai_hw_params(struct snd_soc_dai *dai,
 			  struct snd_pcm_substream *substream,
@@ -218,7 +218,7 @@ void snd_soc_dai_suspend(struct snd_soc_dai *dai);
 void snd_soc_dai_resume(struct snd_soc_dai *dai);
 int snd_soc_dai_compress_new(struct snd_soc_dai *dai,
 			     struct snd_soc_pcm_runtime *rtd, int num);
-bool snd_soc_dai_stream_valid(struct snd_soc_dai *dai, int stream);
+bool snd_soc_dai_stream_valid(const struct snd_soc_dai *dai, int stream);
 void snd_soc_dai_link_set_capabilities(struct snd_soc_dai_link *dai_link);
 void snd_soc_dai_action(struct snd_soc_dai *dai,
 			int stream, int action);
@@ -232,7 +232,7 @@ static inline void snd_soc_dai_deactivate(struct snd_soc_dai *dai,
 {
 	snd_soc_dai_action(dai, stream, -1);
 }
-int snd_soc_dai_active(struct snd_soc_dai *dai);
+int snd_soc_dai_active(const struct snd_soc_dai *dai);
 
 int snd_soc_pcm_dai_probe(struct snd_soc_pcm_runtime *rtd, int order);
 int snd_soc_pcm_dai_remove(struct snd_soc_pcm_runtime *rtd, int order);
@@ -271,7 +271,7 @@ int snd_soc_dai_compr_get_metadata(struct snd_soc_dai *dai,
 				   struct snd_compr_stream *cstream,
 				   struct snd_compr_metadata *metadata);
 
-const char *snd_soc_dai_name_get(struct snd_soc_dai *dai);
+const char *snd_soc_dai_name_get(const struct snd_soc_dai *dai);
 
 struct snd_soc_dai_ops {
 	/* DAI driver callbacks */
@@ -518,7 +518,8 @@ static inline void snd_soc_dai_init_dma_data(struct snd_soc_dai *dai, void *play
 	snd_soc_dai_dma_data_set_capture(dai,  capture);
 }
 
-static inline unsigned int snd_soc_dai_tdm_mask_get(struct snd_soc_dai *dai, int stream)
+static inline unsigned int snd_soc_dai_tdm_mask_get(const struct snd_soc_dai *dai,
+						    int stream)
 {
 	return dai->stream[stream].tdm_mask;
 }
@@ -529,7 +530,8 @@ static inline void snd_soc_dai_tdm_mask_set(struct snd_soc_dai *dai, int stream,
 	dai->stream[stream].tdm_mask = tdm_mask;
 }
 
-static inline unsigned int snd_soc_dai_stream_active(struct snd_soc_dai *dai, int stream)
+static inline unsigned int snd_soc_dai_stream_active(const struct snd_soc_dai *dai,
+						     int stream)
 {
 	/* see snd_soc_dai_action() for setup */
 	return dai->stream[stream].active;
diff --git a/include/sound/soc.h b/include/sound/soc.h
index f02a51694ab4..a8e66bbf932b 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -534,10 +534,10 @@ static inline int snd_soc_set_dmi_name(struct snd_soc_card *card,
 
 /* Utility functions to get clock rates from various things */
 int snd_soc_calc_frame_size(int sample_size, int channels, int tdm_slots);
-int snd_soc_params_to_frame_size(struct snd_pcm_hw_params *params);
+int snd_soc_params_to_frame_size(const struct snd_pcm_hw_params *params);
 int snd_soc_calc_bclk(int fs, int sample_size, int channels, int tdm_slots);
-int snd_soc_params_to_bclk(struct snd_pcm_hw_params *parms);
-int snd_soc_tdm_params_to_bclk(struct snd_pcm_hw_params *params,
+int snd_soc_params_to_bclk(const struct snd_pcm_hw_params *parms);
+int snd_soc_tdm_params_to_bclk(const struct snd_pcm_hw_params *params,
 			       int tdm_width, int tdm_slots, int slot_multiple);
 
 /* set runtime hw params */
@@ -837,7 +837,8 @@ struct snd_soc_dai_link {
 #endif
 };
 
-static inline int snd_soc_link_num_ch_map(struct snd_soc_dai_link *link) {
+static inline int snd_soc_link_num_ch_map(const struct snd_soc_dai_link *link)
+{
 	return max(link->num_cpus, link->num_codecs);
 }
 
@@ -1299,7 +1300,7 @@ struct soc_enum {
 #endif
 };
 
-static inline bool snd_soc_volsw_is_stereo(struct soc_mixer_control *mc)
+static inline bool snd_soc_volsw_is_stereo(const struct soc_mixer_control *mc)
 {
 	if (mc->reg == mc->rreg && mc->shift == mc->rshift)
 		return false;
@@ -1311,7 +1312,7 @@ static inline bool snd_soc_volsw_is_stereo(struct soc_mixer_control *mc)
 	return true;
 }
 
-static inline unsigned int snd_soc_enum_val_to_item(struct soc_enum *e,
+static inline unsigned int snd_soc_enum_val_to_item(const struct soc_enum *e,
 	unsigned int val)
 {
 	unsigned int i;
@@ -1326,7 +1327,7 @@ static inline unsigned int snd_soc_enum_val_to_item(struct soc_enum *e,
 	return 0;
 }
 
-static inline unsigned int snd_soc_enum_item_to_val(struct soc_enum *e,
+static inline unsigned int snd_soc_enum_item_to_val(const struct soc_enum *e,
 	unsigned int item)
 {
 	if (!e->values)
@@ -1401,7 +1402,7 @@ unsigned int snd_soc_daifmt_parse_clock_provider_raw(struct device_node *np,
 	snd_soc_daifmt_clock_provider_from_bitmap(			\
 		snd_soc_daifmt_parse_clock_provider_as_bitmap(np, prefix))
 
-int snd_soc_get_stream_cpu(struct snd_soc_dai_link *dai_link, int stream);
+int snd_soc_get_stream_cpu(const struct snd_soc_dai_link *dai_link, int stream);
 int snd_soc_get_dlc(const struct of_phandle_args *args,
 		    struct snd_soc_dai_link_component *dlc);
 int snd_soc_of_get_dlc(struct device_node *of_node,
-- 
cgit v1.2.3


From 785d64c4941221044940ab199e6625af17296470 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Mon, 17 Jun 2024 15:03:22 +0200
Subject: ASoC: Constify DAI passed to get_channel_map

get_channel_map() is supposed to obtain map of channels without
modifying the state of the given DAI, so make the pointer to 'struct
snd_soc_dai' as pointing to const.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-4-8004f346ee38@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 3c45b418270e..f22969298de1 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -198,7 +198,7 @@ int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute,
 			     int direction);
 
 
-int snd_soc_dai_get_channel_map(struct snd_soc_dai *dai,
+int snd_soc_dai_get_channel_map(const struct snd_soc_dai *dai,
 		unsigned int *tx_num, unsigned int *tx_slot,
 		unsigned int *rx_num, unsigned int *rx_slot);
 
@@ -307,7 +307,7 @@ struct snd_soc_dai_ops {
 	int (*set_channel_map)(struct snd_soc_dai *dai,
 		unsigned int tx_num, const unsigned int *tx_slot,
 		unsigned int rx_num, const unsigned int *rx_slot);
-	int (*get_channel_map)(struct snd_soc_dai *dai,
+	int (*get_channel_map)(const struct snd_soc_dai *dai,
 			unsigned int *tx_num, unsigned int *tx_slot,
 			unsigned int *rx_num, unsigned int *rx_slot);
 	int (*set_tristate)(struct snd_soc_dai *dai, int tristate);
-- 
cgit v1.2.3


From de267e7a6ea8e6fa29af2287adfc9fc9d87e6dc9 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Mon, 17 Jun 2024 15:03:23 +0200
Subject: ASoC: Constify return of snd_soc_dai_get_pcm_stream()

Returned 'struct snd_soc_pcm_stream' by snd_soc_dai_get_pcm_stream() is
not modified by the users, so it can be changed as pointer to const.
This is a necessary step towards making the 'dai->driver' a pointer to
const.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-5-8004f346ee38@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index f22969298de1..bd249710d716 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -473,7 +473,7 @@ struct snd_soc_dai {
 	unsigned int probed:1;
 };
 
-static inline struct snd_soc_pcm_stream *
+static inline const struct snd_soc_pcm_stream *
 snd_soc_dai_get_pcm_stream(const struct snd_soc_dai *dai, int stream)
 {
 	return (stream == SNDRV_PCM_STREAM_PLAYBACK) ?
-- 
cgit v1.2.3


From d6a711a898672dd873aab3844f754a3ca40723a5 Mon Sep 17 00:00:00 2001
From: Patrice Chotard <patrice.chotard@foss.st.com>
Date: Tue, 18 Jun 2024 15:29:51 +0200
Subject: spi: Fix OCTAL mode support

Add OCTAL mode support.
Issue detected using "--octal" spidev_test's option.

Signed-off-by: Patrice Chotard <patrice.chotard@foss.st.com>
Link: https://msgid.link/r/20240618132951.2743935-4-patrice.chotard@foss.st.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e8e1e798924f..98fdef6e28f2 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1085,12 +1085,13 @@ struct spi_transfer {
 	unsigned	dummy_data:1;
 	unsigned	cs_off:1;
 	unsigned	cs_change:1;
-	unsigned	tx_nbits:3;
-	unsigned	rx_nbits:3;
+	unsigned	tx_nbits:4;
+	unsigned	rx_nbits:4;
 	unsigned	timestamped:1;
 #define	SPI_NBITS_SINGLE	0x01 /* 1-bit transfer */
 #define	SPI_NBITS_DUAL		0x02 /* 2-bit transfer */
 #define	SPI_NBITS_QUAD		0x04 /* 4-bit transfer */
+#define	SPI_NBITS_OCTAL	0x08 /* 8-bit transfer */
 	u8		bits_per_word;
 	struct spi_delay	delay;
 	struct spi_delay	cs_change_delay;
-- 
cgit v1.2.3


From 702eb71fd6501b3566283f8c96d7ccc6ddd662e9 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 17 Jun 2024 18:23:00 +0200
Subject: fsnotify: Do not generate events for O_PATH file descriptors

Currently we will not generate FS_OPEN events for O_PATH file
descriptors but we will generate FS_CLOSE events for them. This is
asymmetry is confusing. Arguably no fsnotify events should be generated
for O_PATH file descriptors as they cannot be used to access or modify
file content, they are just convenient handles to file objects like
paths. So fix the asymmetry by stopping to generate FS_CLOSE for O_PATH
file descriptors.

Cc: <stable@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20240617162303.1596-1-jack@suse.cz
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fsnotify.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 4da80e92f804..278620e063ab 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -112,7 +112,13 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
 {
 	const struct path *path;
 
-	if (file->f_mode & FMODE_NONOTIFY)
+	/*
+	 * FMODE_NONOTIFY are fds generated by fanotify itself which should not
+	 * generate new events. We also don't want to generate events for
+	 * FMODE_PATH fds (involves open & close events) as they are just
+	 * handle creation / destruction events and not "real" file events.
+	 */
+	if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH))
 		return 0;
 
 	path = &file->f_path;
-- 
cgit v1.2.3


From 9f774c757e3fb2ac32dc4377e8f21f3364a8df81 Mon Sep 17 00:00:00 2001
From: Shenghao Ding <shenghao-ding@ti.com>
Date: Fri, 14 Jun 2024 21:36:45 +0800
Subject: ASoc: tas2781: Enable RCA-based playback without DSP firmware
 download

In only loading RCA (Reconfigurable Architecture) binary case, no DSP
program will be working inside tas2563/tas2781, that is dsp-bypass mode,
do not support speaker protection, or audio acoustic algorithms in this
mode.

Fixes: ef3bcde75d06 ("ASoC: tas2781: Add tas2781 driver")
Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://msgid.link/r/20240614133646.910-1-shenghao-ding@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/tas2781-dsp.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/tas2781-dsp.h b/include/sound/tas2781-dsp.h
index 7fba7ea26a4b..3cda9da14f6d 100644
--- a/include/sound/tas2781-dsp.h
+++ b/include/sound/tas2781-dsp.h
@@ -117,10 +117,17 @@ struct tasdevice_fw {
 	struct device *dev;
 };
 
-enum tasdevice_dsp_fw_state {
-	TASDEVICE_DSP_FW_NONE = 0,
+enum tasdevice_fw_state {
+	/* Driver in startup mode, not load any firmware. */
 	TASDEVICE_DSP_FW_PENDING,
+	/* DSP firmware in the system, but parsing error. */
 	TASDEVICE_DSP_FW_FAIL,
+	/*
+	 * Only RCA (Reconfigurable Architecture) firmware load
+	 * successfully.
+	 */
+	TASDEVICE_RCA_FW_OK,
+	/* Both RCA and DSP firmware load successfully. */
 	TASDEVICE_DSP_FW_ALL_OK,
 };
 
-- 
cgit v1.2.3


From a6816314af5749cd88944bfdceb270c627cdf348 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 3 May 2024 11:17:32 -0700
Subject: KVM: Introduce vcpu->wants_to_run

Introduce vcpu->wants_to_run to indicate when a vCPU is in its core run
loop, i.e. when the vCPU is running the KVM_RUN ioctl and immediate_exit
was not set.

Replace all references to vcpu->run->immediate_exit with
!vcpu->wants_to_run to avoid TOCTOU races with userspace. For example, a
malicious userspace could invoked KVM_RUN with immediate_exit=true and
then after KVM reads it to set wants_to_run=false, flip it to false.
This would result in the vCPU running in KVM_RUN with
wants_to_run=false. This wouldn't cause any real bugs today but is a
dangerous landmine.

Signed-off-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20240503181734.1467938-2-dmatlack@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7b9d2633a931..d72ced3e74d1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -378,6 +378,7 @@ struct kvm_vcpu {
 		bool dy_eligible;
 	} spin_loop;
 #endif
+	bool wants_to_run;
 	bool preempted;
 	bool ready;
 	bool scheduled_out;
-- 
cgit v1.2.3


From 4b23e0c199b20fa6fe9655b3d0e12d6c6f18c27f Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 3 May 2024 11:17:33 -0700
Subject: KVM: Ensure new code that references immediate_exit gets extra
 scrutiny

Ensure that any new KVM code that references immediate_exit gets extra
scrutiny by renaming it to immediate_exit__unsafe in kernel code.

All fields in struct kvm_run are subject to TOCTOU races since they are
mapped into userspace, which may be malicious or buggy. To protect KVM,
introduces a new macro that appends __unsafe to select field names in
struct kvm_run, hinting to developers and reviewers that accessing such
fields must be done carefully.

Apply the new macro to immediate_exit, since userspace can make
immediate_exit inconsistent with vcpu->wants_to_run, i.e. accessing
immediate_exit directly could lead to unexpected bugs in the future.

Signed-off-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20240503181734.1467938-3-dmatlack@google.com
[sean: massage changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/uapi/linux/kvm.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d03842abae57..795773f5db63 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -192,11 +192,24 @@ struct kvm_xen_exit {
 /* Flags that describe what fields in emulation_failure hold valid data. */
 #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
 
+/*
+ * struct kvm_run can be modified by userspace at any time, so KVM must be
+ * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM()
+ * renames fields in struct kvm_run from <symbol> to <symbol>__unsafe when
+ * compiled into the kernel, ensuring that any use within KVM is obvious and
+ * gets extra scrutiny.
+ */
+#ifdef __KERNEL__
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe
+#else
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol
+#endif
+
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
 	/* in */
 	__u8 request_interrupt_window;
-	__u8 immediate_exit;
+	__u8 HINT_UNSAFE_IN_KVM(immediate_exit);
 	__u8 padding1[6];
 
 	/* out */
-- 
cgit v1.2.3


From 395e73bd8d35fa9b8505e44f63a2a10abde09cce Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sun, 9 Jun 2024 20:12:19 -0700
Subject: srcu: Add NUM_ACTIVE_SRCU_POLL_OLDSTATE

This commit adds NUM_ACTIVE_SRCU_POLL_OLDSTATE, which gives the maximum
number of distinct return values from get_state_synchronize_rcu()
that can, at a given point in time, correspond to not-completed SRCU
grace periods.

Reported-by: Kent Overstreet <kent.overstreet@linux.dev>
Closes: https://lore.kernel.org/all/irycqy4sinjdgm2hkyix2bffunpcmuwgeufsx6nlljvqme3wiu@ify3zdnrmzph/
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/srcu.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 236610e4a8fa..f664cba7a80c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -61,6 +61,10 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp);
 unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp);
 bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);
 
+// Maximum number of unsigned long values corresponding to
+// not-yet-completed SRCU grace periods.
+#define NUM_ACTIVE_SRCU_POLL_OLDSTATE 2
+
 #ifdef CONFIG_NEED_SRCU_NMI_SAFE
 int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
 void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);
-- 
cgit v1.2.3


From e206f33e2c0774276a0497fe538472e12016a362 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 14 Jun 2024 13:26:44 -0700
Subject: srcu: Fill out polled grace-period APIs

This commit adds the get_completed_synchronize_srcu() and the
same_state_synchronize_srcu() functions.  The first returns a cookie
that is always interpreted as corresponding to an expired grace period.
The second does an equality comparison of a pair of cookies.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/srcu.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index f664cba7a80c..6f6cb5fc1242 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -57,6 +57,20 @@ void cleanup_srcu_struct(struct srcu_struct *ssp);
 int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
 void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
 void synchronize_srcu(struct srcu_struct *ssp);
+
+#define SRCU_GET_STATE_COMPLETED 0x1
+
+/**
+ * get_completed_synchronize_srcu - Return a pre-completed polled state cookie
+ *
+ * Returns a value that poll_state_synchronize_srcu() will always treat
+ * as a cookie whose grace period has already completed.
+ */
+static inline unsigned long get_completed_synchronize_srcu(void)
+{
+	return SRCU_GET_STATE_COMPLETED;
+}
+
 unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp);
 unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp);
 bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);
@@ -65,6 +79,23 @@ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);
 // not-yet-completed SRCU grace periods.
 #define NUM_ACTIVE_SRCU_POLL_OLDSTATE 2
 
+/**
+ * same_state_synchronize_srcu - Are two old-state values identical?
+ * @oldstate1: First old-state value.
+ * @oldstate2: Second old-state value.
+ *
+ * The two old-state values must have been obtained from either
+ * get_state_synchronize_srcu(), start_poll_synchronize_srcu(), or
+ * get_completed_synchronize_srcu().  Returns @true if the two values are
+ * identical and @false otherwise.  This allows structures whose lifetimes
+ * are tracked by old-state values to push these values to a list header,
+ * allowing those structures to be slightly smaller.
+ */
+static inline bool same_state_synchronize_srcu(unsigned long oldstate1, unsigned long oldstate2)
+{
+	return oldstate1 == oldstate2;
+}
+
 #ifdef CONFIG_NEED_SRCU_NMI_SAFE
 int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
 void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);
-- 
cgit v1.2.3


From 5c563ee90a22d3295bcd6217e3ecd7bf9f4d9d48 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Fri, 24 May 2024 11:58:28 -0700
Subject: cpumask: introduce assign_cpu() macro

Now that assign_bit() is a thin macro wrapper around set_bit() and
clear_bit(), we can use it in cpumask API and drop duplicating
implementations of set_cpu_xxx() helpers with no additional overhead.

Bloat-o-meter reports almost 2k less of generated code for allyesconfig,
mostly in kernel/cpu.c:
	add/remove: 2/4 grow/shrink: 3/4 up/down: 498/-2228 (-1730)

Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/cpumask.h | 40 ++++++----------------------------------
 1 file changed, 6 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 23686bed441d..18410acdbc9e 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1083,44 +1083,16 @@ void init_cpu_present(const struct cpumask *src);
 void init_cpu_possible(const struct cpumask *src);
 void init_cpu_online(const struct cpumask *src);
 
-static inline void
-set_cpu_possible(unsigned int cpu, bool possible)
-{
-	if (possible)
-		cpumask_set_cpu(cpu, &__cpu_possible_mask);
-	else
-		cpumask_clear_cpu(cpu, &__cpu_possible_mask);
-}
+#define assign_cpu(cpu, mask, val)	\
+	assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val))
 
-static inline void
-set_cpu_present(unsigned int cpu, bool present)
-{
-	if (present)
-		cpumask_set_cpu(cpu, &__cpu_present_mask);
-	else
-		cpumask_clear_cpu(cpu, &__cpu_present_mask);
-}
+#define set_cpu_possible(cpu, possible)	assign_cpu((cpu), &__cpu_possible_mask, (possible))
+#define set_cpu_present(cpu, present)	assign_cpu((cpu), &__cpu_present_mask, (present))
+#define set_cpu_active(cpu, active)	assign_cpu((cpu), &__cpu_active_mask, (active))
+#define set_cpu_dying(cpu, dying)	assign_cpu((cpu), &__cpu_dying_mask, (dying))
 
 void set_cpu_online(unsigned int cpu, bool online);
 
-static inline void
-set_cpu_active(unsigned int cpu, bool active)
-{
-	if (active)
-		cpumask_set_cpu(cpu, &__cpu_active_mask);
-	else
-		cpumask_clear_cpu(cpu, &__cpu_active_mask);
-}
-
-static inline void
-set_cpu_dying(unsigned int cpu, bool dying)
-{
-	if (dying)
-		cpumask_set_cpu(cpu, &__cpu_dying_mask);
-	else
-		cpumask_clear_cpu(cpu, &__cpu_dying_mask);
-}
-
 /**
  * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask *
  * @bitmap: the bitmap
-- 
cgit v1.2.3


From e0eeb938adb0367de4b0946125a06142d8de7d37 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Tue, 11 Jun 2024 15:38:12 +0300
Subject: bitops: Add a comment explaining the double underscore macros

Linus Walleij pointed out that a new comer might be confused about the
difference between set_bit() and __set_bit().  Add a comment explaining
the difference.

Link: https://lore.kernel.org/all/CACRpkdZFPG_YLici-BmYfk9HZ36f4WavCN3JNotkk8cPgCODCg@mail.gmail.com/
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/bitops.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 46d4bdc634c0..ba35bbf07798 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -47,12 +47,17 @@ extern unsigned long __sw_hweight64(__u64 w);
 	  __builtin_constant_p(*(const unsigned long *)(addr))) ?	\
 	 const##op(nr, addr) : op(nr, addr))
 
+/*
+ * The following macros are non-atomic versions of their non-underscored
+ * counterparts.
+ */
 #define __set_bit(nr, addr)		bitop(___set_bit, nr, addr)
 #define __clear_bit(nr, addr)		bitop(___clear_bit, nr, addr)
 #define __change_bit(nr, addr)		bitop(___change_bit, nr, addr)
 #define __test_and_set_bit(nr, addr)	bitop(___test_and_set_bit, nr, addr)
 #define __test_and_clear_bit(nr, addr)	bitop(___test_and_clear_bit, nr, addr)
 #define __test_and_change_bit(nr, addr)	bitop(___test_and_change_bit, nr, addr)
+
 #define test_bit(nr, addr)		bitop(_test_bit, nr, addr)
 #define test_bit_acquire(nr, addr)	bitop(_test_bit_acquire, nr, addr)
 
-- 
cgit v1.2.3


From 52c2e956dcebecc8901911217a9647203ebcaf3c Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Mon, 17 Jun 2024 18:45:53 -0700
Subject: drm/xe/perf/uapi: "Perf" layer to support multiple perf counter
 stream types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In Xe, the plan is to support multiple types of perf counter streams (OA is
only one type of these streams). Rather than introduce NxM ioctls for
these (N perf streams with M ioctl's per perf stream), we decide to
multiplex these (N different stream types and the M ops for each of these
stream types) through a single PERF ioctl. This multiplexing is the purpose
of the PERF layer.

In addition to PERF DRM ioctl's, another set of ioctl's on the PERF fd are
defined. These are expected to be common to different PERF stream types and
therefore defined at the PERF layer itself.

v2: Add param_size to 'struct drm_xe_perf_param' (Umesh)
v3: Rename 'enum drm_xe_perf_ops' to
    'enum drm_xe_perf_ioctls' (Guy Zadicario)
    Add DRM_ prefix to ioctl names to indicate uapi names
v4: Add 'enum drm_xe_perf_op' previously missed out (Guy Zadicario)
v5: Squash the ops and PERF layer patches into a single patch (Umesh)
    Remove param_size from struct 'drm_xe_perf_param' (Umesh)
v6: Add DRM_XE_PERF_IOCTL_STATUS
v7: Add DRM_XE_PERF_IOCTL_INFO
v8: Fix Copyright years, fix DRM_XE_PERF_TYPE_MAX, move '#include
    "xe_perf.h"' to xe_perf.c, add kernel doc (Michal)

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: Guy Zadicario <gzadicario@habana.ai>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-2-ashutosh.dixit@intel.com
---
 include/uapi/drm/xe_drm.h | 66 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index d7b0903c22b2..c1626027dc69 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -80,6 +80,7 @@ extern "C" {
  *  - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
  *  - &DRM_IOCTL_XE_EXEC
  *  - &DRM_IOCTL_XE_WAIT_USER_FENCE
+ *  - &DRM_IOCTL_XE_PERF
  */
 
 /*
@@ -100,6 +101,8 @@ extern "C" {
 #define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x08
 #define DRM_XE_EXEC			0x09
 #define DRM_XE_WAIT_USER_FENCE		0x0a
+#define DRM_XE_PERF			0x0b
+
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_XE_DEVICE_QUERY		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
@@ -113,6 +116,7 @@ extern "C" {
 #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
 #define DRM_IOCTL_XE_EXEC			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
+#define DRM_IOCTL_XE_PERF			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param)
 
 /**
  * DOC: Xe IOCTL Extensions
@@ -1370,6 +1374,68 @@ struct drm_xe_wait_user_fence {
 	__u64 reserved[2];
 };
 
+/**
+ * enum drm_xe_perf_type - Perf stream types
+ */
+enum drm_xe_perf_type {
+	__DRM_XE_PERF_TYPE_MAX, /* non-ABI */
+};
+
+/**
+ * enum drm_xe_perf_op - Perf stream ops
+ */
+enum drm_xe_perf_op {
+	/** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */
+	DRM_XE_PERF_OP_STREAM_OPEN,
+
+	/** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */
+	DRM_XE_PERF_OP_ADD_CONFIG,
+
+	/** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */
+	DRM_XE_PERF_OP_REMOVE_CONFIG,
+};
+
+/**
+ * struct drm_xe_perf_param - Input of &DRM_XE_PERF
+ *
+ * The perf layer enables multiplexing perf counter streams of multiple
+ * types. The actual params for a particular stream operation are supplied
+ * via the @param pointer (use __copy_from_user to get these params).
+ */
+struct drm_xe_perf_param {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+	/** @perf_type: Perf stream type, of enum @drm_xe_perf_type */
+	__u64 perf_type;
+	/** @perf_op: Perf op, of enum @drm_xe_perf_op */
+	__u64 perf_op;
+	/** @param: Pointer to actual stream params */
+	__u64 param;
+};
+
+/**
+ * enum drm_xe_perf_ioctls - Perf fd ioctl's
+ *
+ * Information exchanged between userspace and kernel for perf fd ioctl's
+ * is stream type specific
+ */
+enum drm_xe_perf_ioctls {
+	/** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */
+	DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0),
+
+	/** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */
+	DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1),
+
+	/** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */
+	DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2),
+
+	/** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */
+	DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3),
+
+	/** @DRM_XE_PERF_IOCTL_INFO: Return stream info */
+	DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4),
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 67977882a2f1339f0a7d32576ad61967828b2ca5 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Mon, 17 Jun 2024 18:45:55 -0700
Subject: drm/xe/oa/uapi: Add OA data formats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add and initialize supported OA data formats for various platforms
(including Xe2). User can request OA data in any supported format.

Bspec: 52198, 60942, 61101

v2: Start 'xe_oa_format_name' enum from 0 (Umesh)
    Fix error rewind with OA (Umesh)
v3: Use graphics versions rather than absolute platform names
v4: Add missing kernel doc for struct memebers and enum and other minor
    changes (Michal)

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-4-ashutosh.dixit@intel.com
---
 include/uapi/drm/xe_drm.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index c1626027dc69..7e10874bfb33 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1436,6 +1436,25 @@ enum drm_xe_perf_ioctls {
 	DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4),
 };
 
+/**
+ * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec
+ * 52198/60942
+ */
+enum drm_xe_oa_format_type {
+	/** @DRM_XE_OA_FMT_TYPE_OAG: OAG report format */
+	DRM_XE_OA_FMT_TYPE_OAG,
+	/** @DRM_XE_OA_FMT_TYPE_OAR: OAR report format */
+	DRM_XE_OA_FMT_TYPE_OAR,
+	/** @DRM_XE_OA_FMT_TYPE_OAM: OAM report format */
+	DRM_XE_OA_FMT_TYPE_OAM,
+	/** @DRM_XE_OA_FMT_TYPE_OAC: OAC report format */
+	DRM_XE_OA_FMT_TYPE_OAC,
+	/** @DRM_XE_OA_FMT_TYPE_OAM_MPEC: OAM SAMEDIA or OAM MPEC report format */
+	DRM_XE_OA_FMT_TYPE_OAM_MPEC,
+	/** @DRM_XE_OA_FMT_TYPE_PEC: PEC report format */
+	DRM_XE_OA_FMT_TYPE_PEC,
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From a9f905ae7b6f29a337dda2ad773c08b92dafe9a5 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Mon, 17 Jun 2024 18:45:56 -0700
Subject: drm/xe/oa/uapi: Initialize OA units
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Initialize OA unit data struct's for each gt during device probe. Also
assign OA units for hardware engines.

v2: Remove XE_OA_UNIT_OAG/XE_OA_UNIT_OAM_SAMEDIA_0 enum (Umesh)
    Change mtl_oa_base to 0x13000 (Umesh)
v3: Switch to drmm_ functions and other cleanups (Michal)

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-5-ashutosh.dixit@intel.com
---
 include/uapi/drm/xe_drm.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 7e10874bfb33..323d899a276b 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1436,6 +1436,20 @@ enum drm_xe_perf_ioctls {
 	DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4),
 };
 
+/**
+ * enum drm_xe_oa_unit_type - OA unit types
+ */
+enum drm_xe_oa_unit_type {
+	/**
+	 * @DRM_XE_OA_UNIT_TYPE_OAG: OAG OA unit. OAR/OAC are considered
+	 * sub-types of OAG. For OAR/OAC, use OAG.
+	 */
+	DRM_XE_OA_UNIT_TYPE_OAG,
+
+	/** @DRM_XE_OA_UNIT_TYPE_OAM: OAM OA unit */
+	DRM_XE_OA_UNIT_TYPE_OAM,
+};
+
 /**
  * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec
  * 52198/60942
-- 
cgit v1.2.3


From cdf02fe1a94a768cbcd20f5c4e1a1d805f4a06c0 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Mon, 17 Jun 2024 18:45:57 -0700
Subject: drm/xe/oa/uapi: Add/remove OA config perf ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce add/remove config perf ops for OA. OA configurations consist of a
set of event/counter select register address/value pairs. The add_config
perf op validates and stores such configurations and also exposes them in
the metrics sysfs. These configurations will be programmed to OA unit HW
when an OA stream using a configuration is opened. The OA stream can also
switch to other stored configurations.

v2: Start config id's from 1 and other minor review comments (Umesh)
v3: Add 32 bit build
v4: Add kernel doc for non-static functions (Michal)

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-6-ashutosh.dixit@intel.com
---
 include/uapi/drm/xe_drm.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 323d899a276b..fd9a4bd9e3d4 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1378,6 +1378,7 @@ struct drm_xe_wait_user_fence {
  * enum drm_xe_perf_type - Perf stream types
  */
 enum drm_xe_perf_type {
+	DRM_XE_PERF_TYPE_OA,
 	__DRM_XE_PERF_TYPE_MAX, /* non-ABI */
 };
 
@@ -1469,6 +1470,30 @@ enum drm_xe_oa_format_type {
 	DRM_XE_OA_FMT_TYPE_PEC,
 };
 
+/**
+ * struct drm_xe_oa_config - OA metric configuration
+ *
+ * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A
+ * particular config can be specified when opening an OA stream using
+ * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property.
+ */
+struct drm_xe_oa_config {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @uuid: String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" */
+	char uuid[36];
+
+	/** @n_regs: Number of regs in @regs_ptr */
+	__u32 n_regs;
+
+	/**
+	 * @regs_ptr: Pointer to (register address, value) pairs for OA config
+	 * registers. Expected length of buffer is: (2 * sizeof(u32) * @n_regs).
+	 */
+	__u64 regs_ptr;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From b6fd51c6211910b1db072a3fa2a17ba85cb3dd51 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Mon, 17 Jun 2024 18:45:58 -0700
Subject: drm/xe/oa/uapi: Define and parse OA stream properties
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Properties for OA streams are specified by user space, when the stream is
opened, as a chain of drm_xe_ext_set_property struct's. Parse and validate
these stream properties.

v2: Remove struct drm_xe_oa_open_param (Harish Chegondi)
    Drop DRM_XE_OA_PROPERTY_POLL_OA_PERIOD_US (Umesh)
    Eliminate comparison with xe_oa_max_sample_rate (Umesh)
    Drop 'struct drm_xe_oa_record_header' (Umesh)
v3: s/DRM_XE_OA_PROPERTY_OA_EXPONENT/ \
    DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT/ (Jose)
v4: Fix 32 bit build
v5: Add non-static function kernel doc (Michal)

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-7-ashutosh.dixit@intel.com
---
 include/uapi/drm/xe_drm.h | 72 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index fd9a4bd9e3d4..307409f968e2 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1470,6 +1470,78 @@ enum drm_xe_oa_format_type {
 	DRM_XE_OA_FMT_TYPE_PEC,
 };
 
+/**
+ * enum drm_xe_oa_property_id - OA stream property id's
+ *
+ * Stream params are specified as a chain of @drm_xe_ext_set_property
+ * struct's, with @property values from enum @drm_xe_oa_property_id and
+ * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY.
+ * @param field in struct @drm_xe_perf_param points to the first
+ * @drm_xe_ext_set_property struct.
+ */
+enum drm_xe_oa_property_id {
+#define DRM_XE_OA_EXTENSION_SET_PROPERTY	0
+	/**
+	 * @DRM_XE_OA_PROPERTY_OA_UNIT_ID: ID of the OA unit on which to open
+	 * the OA stream, see @oa_unit_id in 'struct
+	 * drm_xe_query_oa_units'. Defaults to 0 if not provided.
+	 */
+	DRM_XE_OA_PROPERTY_OA_UNIT_ID = 1,
+
+	/**
+	 * @DRM_XE_OA_PROPERTY_SAMPLE_OA: A value of 1 requests inclusion of raw
+	 * OA unit reports or stream samples in a global buffer attached to an
+	 * OA unit.
+	 */
+	DRM_XE_OA_PROPERTY_SAMPLE_OA,
+
+	/**
+	 * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA
+	 * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG.
+	 */
+	DRM_XE_OA_PROPERTY_OA_METRIC_SET,
+
+	/** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */
+	DRM_XE_OA_PROPERTY_OA_FORMAT,
+	/*
+	 * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942,
+	 * in terms of the following quantities: a. enum @drm_xe_oa_format_type
+	 * b. Counter select c. Counter size and d. BC report. Also refer to the
+	 * oa_formats array in drivers/gpu/drm/xe/xe_oa.c.
+	 */
+#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE		(0xff << 0)
+#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL	(0xff << 8)
+#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE	(0xff << 16)
+#define DRM_XE_OA_FORMAT_MASK_BC_REPORT		(0xff << 24)
+
+	/**
+	 * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit
+	 * sampling with sampling frequency proportional to 2^(period_exponent + 1)
+	 */
+	DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT,
+
+	/**
+	 * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA
+	 * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE).
+	 */
+	DRM_XE_OA_PROPERTY_OA_DISABLED,
+
+	/**
+	 * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific
+	 * @exec_queue_id. Perf queries can be executed on this exec queue.
+	 */
+	DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID,
+
+	/**
+	 * @DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE: Optional engine instance to
+	 * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0.
+	 */
+	DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE,
+
+	/** @DRM_XE_OA_PROPERTY_MAX: non-ABI */
+	DRM_XE_OA_PROPERTY_MAX
+};
+
 /**
  * struct drm_xe_oa_config - OA metric configuration
  *
-- 
cgit v1.2.3


From e936f885f1e96f59d9d05fb6cb5a02b9b9b88a05 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Mon, 17 Jun 2024 18:46:00 -0700
Subject: drm/xe/oa/uapi: Expose OA stream fd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The OA stream open perf op returns an fd with its own file_operations for
the newly initialized OA stream. These file_operations allow userspace to
enable or disable the stream, as well as apply a different metric
configuration for the OA stream. Userspace can also poll for data
availability. OA stream initialization is completed in this commit by
enabling the OA stream. When sampling is enabled this starts a hrtimer
which periodically checks for data availablility.

v2: Use stream properties for stream reconfiguration with
    DRM_XE_PERF_IOCTL_CONFIG
v3: Hold runtime_pm reference across oa buffer alloc/free
v4: Fix 32 bit build

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-9-ashutosh.dixit@intel.com
---
 include/uapi/drm/xe_drm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 307409f968e2..1e09f786b3e6 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1478,6 +1478,10 @@ enum drm_xe_oa_format_type {
  * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY.
  * @param field in struct @drm_xe_perf_param points to the first
  * @drm_xe_ext_set_property struct.
+ *
+ * Exactly the same mechanism is also used for stream reconfiguration using
+ * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of
+ * properties below can be specified for stream reconfiguration.
  */
 enum drm_xe_oa_property_id {
 #define DRM_XE_OA_EXTENSION_SET_PROPERTY	0
-- 
cgit v1.2.3


From efb315d0a013cdc8b1e49f5c07b1a2972bc624d4 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Mon, 17 Jun 2024 18:46:01 -0700
Subject: drm/xe/oa/uapi: Read file_operation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement the OA stream read file_operation. Both blocking and non-blocking
reads are supported. As part of read system call, the read copies OA perf
data from the OA buffer to the user buffer, after appending packet headers
for status and data packets.

v2: Drop OA report headers, implement DRM_XE_PERF_IOCTL_STATUS (Umesh)
v3: Introduce 'struct drm_xe_oa_stream_status'
v4: Define oa_status register bitfields (Umesh)
v5: Add extensions to 'struct drm_xe_oa_stream_status'
v6: Minor cleanup, eliminate report32 variable
v7: Use -EIO to signal to userspace to read OASTATUS using
    DRM_XE_PERF_IOCTL_STATUS, change previous sites returning -EIO to
    return -EINVAL
    Make drm_xe_oa_stream_status bits contiguous (Jose, Umesh)
    rmw oa_status bits (Umesh)

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-10-ashutosh.dixit@intel.com
---
 include/uapi/drm/xe_drm.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 1e09f786b3e6..03a6e479227a 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1570,6 +1570,26 @@ struct drm_xe_oa_config {
 	__u64 regs_ptr;
 };
 
+/**
+ * struct drm_xe_oa_stream_status - OA stream status returned from
+ * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to
+ * query stream status in response to EIO errno from perf fd read().
+ */
+struct drm_xe_oa_stream_status {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @oa_status: OA stream status (see Bspec 46717/61226) */
+	__u64 oa_status;
+#define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL		(1 << 3)
+#define DRM_XE_OASTATUS_COUNTER_OVERFLOW	(1 << 2)
+#define DRM_XE_OASTATUS_BUFFER_OVERFLOW		(1 << 1)
+#define DRM_XE_OASTATUS_REPORT_LOST		(1 << 0)
+
+	/** @reserved: reserved for future use */
+	__u64 reserved[3];
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From dd6b4718c3bab611588922ae8a7736c58eafcc93 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Mon, 17 Jun 2024 18:46:04 -0700
Subject: drm/xe/oa/uapi: Query OA unit properties

Implement query for properties of OA units present on a device.

v2: Clean up reserved/pad fields (Umesh)
    Follow the same scheme as other query structs
v3: Skip reporting reserved engines attached to OA units
v4: Expose oa_buf_size via DRM_XE_PERF_IOCTL_INFO (Umesh)
v5: Don't expose capabilities as OR of properties (Umesh)
v6: Add extensions to query output structs: drm_xe_oa_unit,
    drm_xe_query_oa_units and drm_xe_oa_stream_info
v7: Change oa_units[] array to __u64 type

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-13-ashutosh.dixit@intel.com
---
 include/uapi/drm/xe_drm.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 03a6e479227a..93e00be44b2d 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -689,6 +689,7 @@ struct drm_xe_device_query {
 #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY		5
 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES	6
 #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION	7
+#define DRM_XE_DEVICE_QUERY_OA_UNITS		8
 	/** @query: The type of data to query */
 	__u32 query;
 
@@ -1451,6 +1452,75 @@ enum drm_xe_oa_unit_type {
 	DRM_XE_OA_UNIT_TYPE_OAM,
 };
 
+/**
+ * struct drm_xe_oa_unit - describe OA unit
+ */
+struct drm_xe_oa_unit {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @oa_unit_id: OA unit ID */
+	__u32 oa_unit_id;
+
+	/** @oa_unit_type: OA unit type of @drm_xe_oa_unit_type */
+	__u32 oa_unit_type;
+
+	/** @capabilities: OA capabilities bit-mask */
+	__u64 capabilities;
+#define DRM_XE_OA_CAPS_BASE		(1 << 0)
+
+	/** @oa_timestamp_freq: OA timestamp freq */
+	__u64 oa_timestamp_freq;
+
+	/** @reserved: MBZ */
+	__u64 reserved[4];
+
+	/** @num_engines: number of engines in @eci array */
+	__u64 num_engines;
+
+	/** @eci: engines attached to this OA unit */
+	struct drm_xe_engine_class_instance eci[];
+};
+
+/**
+ * struct drm_xe_query_oa_units - describe OA units
+ *
+ * If a query is made with a struct drm_xe_device_query where .query
+ * is equal to DRM_XE_DEVICE_QUERY_OA_UNITS, then the reply uses struct
+ * drm_xe_query_oa_units in .data.
+ *
+ * OA unit properties for all OA units can be accessed using a code block
+ * such as the one below:
+ *
+ * .. code-block:: C
+ *
+ *	struct drm_xe_query_oa_units *qoa;
+ *	struct drm_xe_oa_unit *oau;
+ *	u8 *poau;
+ *
+ *	// malloc qoa and issue DRM_XE_DEVICE_QUERY_OA_UNITS. Then:
+ *	poau = (u8 *)&qoa->oa_units[0];
+ *	for (int i = 0; i < qoa->num_oa_units; i++) {
+ *		oau = (struct drm_xe_oa_unit *)poau;
+ *		// Access 'struct drm_xe_oa_unit' fields here
+ *		poau += sizeof(*oau) + oau->num_engines * sizeof(oau->eci[0]);
+ *	}
+ */
+struct drm_xe_query_oa_units {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+	/** @num_oa_units: number of OA units returned in oau[] */
+	__u32 num_oa_units;
+	/** @pad: MBZ */
+	__u32 pad;
+	/**
+	 * @oa_units: struct @drm_xe_oa_unit array returned for this device.
+	 * Written below as a u64 array to avoid problems with nested flexible
+	 * arrays with some compilers
+	 */
+	__u64 oa_units[];
+};
+
 /**
  * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec
  * 52198/60942
@@ -1590,6 +1660,21 @@ struct drm_xe_oa_stream_status {
 	__u64 reserved[3];
 };
 
+/**
+ * struct drm_xe_oa_stream_info - OA stream info returned from
+ * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl
+ */
+struct drm_xe_oa_stream_info {
+	/** @extensions: Pointer to the first extension struct, if any */
+	__u64 extensions;
+
+	/** @oa_buf_size: OA buffer size */
+	__u64 oa_buf_size;
+
+	/** @reserved: reserved for future use */
+	__u64 reserved[3];
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 382d1741b5b2feffef7942dd074206372afe1a96 Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Mon, 17 Jun 2024 13:17:26 -0700
Subject: net: mana: Add support for page sizes other than 4KB on ARM64

As defined by the MANA Hardware spec, the queue size for DMA is 4KB
minimal, and power of 2. And, the HWC queue size has to be exactly
4KB.

To support page sizes other than 4KB on ARM64, define the minimal
queue size as a macro separately from the PAGE_SIZE, which we always
assumed it to be 4KB before supporting ARM64.

Also, add MANA specific macros and update code related to size
alignment, DMA region calculations, etc.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Link: https://lore.kernel.org/r/1718655446-6576-1-git-send-email-haiyangz@microsoft.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/mana/gdma.h | 10 +++++++++-
 include/net/mana/mana.h |  3 ++-
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index c547756c4284..83963d9e804d 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -224,7 +224,15 @@ struct gdma_dev {
 	struct auxiliary_device *adev;
 };
 
-#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE
+/* MANA_PAGE_SIZE is the DMA unit */
+#define MANA_PAGE_SHIFT 12
+#define MANA_PAGE_SIZE BIT(MANA_PAGE_SHIFT)
+#define MANA_PAGE_ALIGN(x) ALIGN((x), MANA_PAGE_SIZE)
+#define MANA_PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), MANA_PAGE_SIZE)
+#define MANA_PFN(a) ((a) >> MANA_PAGE_SHIFT)
+
+/* Required by HW */
+#define MANA_MIN_QSIZE MANA_PAGE_SIZE
 
 #define GDMA_CQE_SIZE 64
 #define GDMA_EQE_SIZE 16
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 59823901b74f..e39b8676fe54 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -42,7 +42,8 @@ enum TRI_STATE {
 
 #define MAX_SEND_BUFFERS_PER_QUEUE 256
 
-#define EQ_SIZE (8 * PAGE_SIZE)
+#define EQ_SIZE (8 * MANA_PAGE_SIZE)
+
 #define LOG2_EQ_THROTTLE 3
 
 #define MAX_PORTS_IN_MANA_DEV 256
-- 
cgit v1.2.3


From 1e4c64b71c9bf230b25fde12cbcceacfdc8b3332 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Thu, 13 Jun 2024 11:55:07 -0400
Subject: mm/memblock: Add "reserve_mem" to reserved named memory at boot up

In order to allow for requesting a memory region that can be used for
things like pstore on multiple machines where the memory layout is not the
same, add a new option to the kernel command line called "reserve_mem".

The format is:  reserve_mem=nn:align:name

Where it will find nn amount of memory at the given alignment of align.
The name field is to allow another subsystem to retrieve where the memory
was found. For example:

  reserve_mem=12M:4096:oops ramoops.mem_name=oops

Where ramoops.mem_name will tell ramoops that memory was reserved for it
via the reserve_mem option and it can find it by calling:

  if (reserve_mem_find_by_name("oops", &start, &size)) {
	// start holds the start address and size holds the size given

This is typically used for systems that do not wipe the RAM, and this
command line will try to reserve the same physical memory on soft reboots.
Note, it is not guaranteed to be the same location. For example, if KASLR
places the kernel at the location of where the RAM reservation was from a
previous boot, the new reservation will be at a different location.  Any
subsystem using this feature must add a way to verify that the contents of
the physical memory is from a previous boot, as there may be cases where
the memory will not be located at the same location.

Not all systems may work either. There could be bit flips if the reboot
goes through the BIOS. Using kexec to reboot the machine is likely to
have better results in such cases.

Link: https://lore.kernel.org/all/ZjJVnZUX3NZiGW6q@kernel.org/

Suggested-by: Mike Rapoport <rppt@kernel.org>
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/20240613155527.437020271@goodmis.org
Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
---
 include/linux/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9849dfda44d4..077fb589b88a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4263,4 +4263,6 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
 void vma_pgtable_walk_begin(struct vm_area_struct *vma);
 void vma_pgtable_walk_end(struct vm_area_struct *vma);
 
+int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size);
+
 #endif /* _LINUX_MM_H */
-- 
cgit v1.2.3


From c53795d48ee8f385c6a9e394651e7ee914baaeba Mon Sep 17 00:00:00 2001
From: Yan Zhai <yan@cloudflare.com>
Date: Mon, 17 Jun 2024 11:09:04 -0700
Subject: net: add rx_sk to trace_kfree_skb

skb does not include enough information to find out receiving
sockets/services and netns/containers on packet drops. In theory
skb->dev tells about netns, but it can get cleared/reused, e.g. by TCP
stack for OOO packet lookup. Similarly, skb->sk often identifies a local
sender, and tells nothing about a receiver.

Allow passing an extra receiving socket to the tracepoint to improve
the visibility on receiving drops.

Signed-off-by: Yan Zhai <yan@cloudflare.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/skb.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 07e0715628ec..b877133cd93a 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -24,13 +24,14 @@ DEFINE_DROP_REASON(FN, FN)
 TRACE_EVENT(kfree_skb,
 
 	TP_PROTO(struct sk_buff *skb, void *location,
-		 enum skb_drop_reason reason),
+		 enum skb_drop_reason reason, struct sock *rx_sk),
 
-	TP_ARGS(skb, location, reason),
+	TP_ARGS(skb, location, reason, rx_sk),
 
 	TP_STRUCT__entry(
 		__field(void *,		skbaddr)
 		__field(void *,		location)
+		__field(void *,		rx_sk)
 		__field(unsigned short,	protocol)
 		__field(enum skb_drop_reason,	reason)
 	),
@@ -38,12 +39,14 @@ TRACE_EVENT(kfree_skb,
 	TP_fast_assign(
 		__entry->skbaddr = skb;
 		__entry->location = location;
+		__entry->rx_sk = rx_sk;
 		__entry->protocol = ntohs(skb->protocol);
 		__entry->reason = reason;
 	),
 
-	TP_printk("skbaddr=%p protocol=%u location=%pS reason: %s",
-		  __entry->skbaddr, __entry->protocol, __entry->location,
+	TP_printk("skbaddr=%p rx_sk=%p protocol=%u location=%pS reason: %s",
+		  __entry->skbaddr, __entry->rx_sk, __entry->protocol,
+		  __entry->location,
 		  __print_symbolic(__entry->reason,
 				   DEFINE_DROP_REASON(FN, FNe)))
 );
-- 
cgit v1.2.3


From ba8de796baf4bdc03530774fb284fe3c97875566 Mon Sep 17 00:00:00 2001
From: Yan Zhai <yan@cloudflare.com>
Date: Mon, 17 Jun 2024 11:09:09 -0700
Subject: net: introduce sk_skb_reason_drop function

Long used destructors kfree_skb and kfree_skb_reason do not pass
receiving socket to packet drop tracepoints trace_kfree_skb.
This makes it hard to track packet drops of a certain netns (container)
or a socket (user application).

The naming of these destructors are also not consistent with most sk/skb
operating functions, i.e. functions named "sk_xxx" or "skb_xxx".
Introduce a new functions sk_skb_reason_drop as drop-in replacement for
kfree_skb_reason on local receiving path. Callers can now pass receiving
sockets to the tracepoints.

kfree_skb and kfree_skb_reason are still usable but they are now just
inline helpers that call sk_skb_reason_drop.

Note it is not feasible to do the same to consume_skb. Packets not
dropped can flow through multiple receive handlers, and have multiple
receiving sockets. Leave it untouched for now.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yan Zhai <yan@cloudflare.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 813406a9bd6c..f4cda3fbdb75 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1251,8 +1251,14 @@ static inline bool skb_data_unref(const struct sk_buff *skb,
 	return true;
 }
 
-void __fix_address
-kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
+void __fix_address sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb,
+				      enum skb_drop_reason reason);
+
+static inline void
+kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+{
+	sk_skb_reason_drop(NULL, skb, reason);
+}
 
 /**
  *	kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
-- 
cgit v1.2.3


From fe1ff61487ace69cd4e680e36169c90667eb9624 Mon Sep 17 00:00:00 2001
From: Jack Yu <jack.yu@realtek.com>
Date: Wed, 19 Jun 2024 05:53:42 +0000
Subject: ASoC: rt1318: Add RT1318 audio amplifier driver

This is the initial i2s-based amplifier driver for rt1318.

Signed-off-by: Jack Yu <jack.yu@realtek.com>
Link: https://msgid.link/r/b3055442ce6d4994aa01aa1fad6ba1fe@realtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/rt1318.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 include/sound/rt1318.h

(limited to 'include')

diff --git a/include/sound/rt1318.h b/include/sound/rt1318.h
new file mode 100644
index 000000000000..fe6bff06036c
--- /dev/null
+++ b/include/sound/rt1318.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/sound/rt1318.h -- Platform data for RT1318
+ *
+ * Copyright 2024 Realtek Semiconductor Corp.
+ */
+
+#ifndef __LINUX_SND_RT1318_H
+#define __LINUX_SND_RT1318_H
+
+struct rt1318_platform_data {
+	unsigned int init_r0_l;
+	unsigned int init_r0_r;
+};
+
+#endif
-- 
cgit v1.2.3


From dc2e77979412d289df9049d8c693761db8602867 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@suse.de>
Date: Fri, 14 Jun 2024 12:30:44 -0400
Subject: net: Split a __sys_bind helper for io_uring

io_uring holds a reference to the file and maintains a
sockaddr_storage address.  Similarly to what was done to
__sys_connect_file, split an internal helper for __sys_bind in
preparation to supporting an io_uring bind command.

Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20240614163047.31581-1-krisman@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/socket.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 89d16b90370b..b3000f49e9f5 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -442,6 +442,8 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
 extern int __sys_socket(int family, int type, int protocol);
 extern struct file *__sys_socket_file(int family, int type, int protocol);
 extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
+extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address,
+			     int addrlen);
 extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
 			      int addrlen, int file_flags);
 extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
-- 
cgit v1.2.3


From bb6aaf736680f0f3c2e6281735c47c64e2042819 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@suse.de>
Date: Fri, 14 Jun 2024 12:30:45 -0400
Subject: net: Split a __sys_listen helper for io_uring

io_uring holds a reference to the file and maintains a sockaddr_storage
address.  Similarly to what was done to __sys_connect_file, split an
internal helper for __sys_listen in preparation to support an
io_uring listen command.

Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20240614163047.31581-2-krisman@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/socket.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index b3000f49e9f5..c1f16cdab677 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -449,6 +449,7 @@ extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
 extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
 			 int addrlen);
 extern int __sys_listen(int fd, int backlog);
+extern int __sys_listen_socket(struct socket *sock, int backlog);
 extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
 			     int __user *usockaddr_len);
 extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
-- 
cgit v1.2.3


From 7481fd93fa0a851740e26026485f56a1305454ce Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@suse.de>
Date: Fri, 14 Jun 2024 12:30:46 -0400
Subject: io_uring: Introduce IORING_OP_BIND

IORING_OP_BIND provides the semantic of bind(2) via io_uring.  While
this is an essentially synchronous system call, the main point is to
enable a network path to execute fully with io_uring registered and
descriptorless files.

Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/20240614163047.31581-3-krisman@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 994bf7af0efe..4ef153d95c87 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -257,6 +257,7 @@ enum io_uring_op {
 	IORING_OP_FUTEX_WAITV,
 	IORING_OP_FIXED_FD_INSTALL,
 	IORING_OP_FTRUNCATE,
+	IORING_OP_BIND,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
cgit v1.2.3


From ff140cc8628abfb1755691d16cfa8788d8820ef7 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@suse.de>
Date: Fri, 14 Jun 2024 12:30:47 -0400
Subject: io_uring: Introduce IORING_OP_LISTEN

IORING_OP_LISTEN provides the semantic of listen(2) via io_uring.  While
this is an essentially synchronous system call, the main point is to
enable a network path to execute fully with io_uring registered and
descriptorless files.

Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/20240614163047.31581-4-krisman@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 4ef153d95c87..2aaf7ee256ac 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -258,6 +258,7 @@ enum io_uring_op {
 	IORING_OP_FIXED_FD_INSTALL,
 	IORING_OP_FTRUNCATE,
 	IORING_OP_BIND,
+	IORING_OP_LISTEN,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
cgit v1.2.3


From 1122c0c1cc71f740fa4d5f14f239194e06a1d5e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:40 +0200
Subject: block: move cache control settings out of queue->flags

Move the cache control settings into the queue_limits so that the flags
can be set atomically with the device queue frozen.

Add new features and flags field for the driver set flags, and internal
(usually sysfs-controlled) flags in the block layer.  Note that we'll
eventually remove enough field from queue_limits to bring it back to the
previous size.

The disable flag is inverted compared to the previous meaning, which
means it now survives a rescan, similar to the max_sectors and
max_discard_sectors user limits.

The FLUSH and FUA flags are now inherited by blk_stack_limits, which
simplified the code in dm a lot, but also causes a slight behavior
change in that dm-switch and dm-unstripe now advertise a write cache
despite setting num_flush_bios to 0.  The I/O path will handle this
gracefully, but as far as I can tell the lack of num_flush_bios
and thus flush support is a pre-existing data integrity bug in those
targets that really needs fixing, after which a non-zero num_flush_bios
should be required in dm for targets that map to underlying devices.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-14-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0c247a716885..acdfe5122faa 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -282,6 +282,28 @@ static inline bool blk_op_is_passthrough(blk_opf_t op)
 	return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
 }
 
+/* flags set by the driver in queue_limits.features */
+enum {
+	/* supports a volatile write cache */
+	BLK_FEAT_WRITE_CACHE			= (1u << 0),
+
+	/* supports passing on the FUA bit */
+	BLK_FEAT_FUA				= (1u << 1),
+};
+
+/*
+ * Flags automatically inherited when stacking limits.
+ */
+#define BLK_FEAT_INHERIT_MASK \
+	(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA)
+
+
+/* internal flags in queue_limits.flags */
+enum {
+	/* do not send FLUSH or FUA command despite advertised write cache */
+	BLK_FLAGS_WRITE_CACHE_DISABLED		= (1u << 31),
+};
+
 /*
  * BLK_BOUNCE_NONE:	never bounce (default)
  * BLK_BOUNCE_HIGH:	bounce all highmem pages
@@ -292,6 +314,8 @@ enum blk_bounce {
 };
 
 struct queue_limits {
+	unsigned int		features;
+	unsigned int		flags;
 	enum blk_bounce		bounce;
 	unsigned long		seg_boundary_mask;
 	unsigned long		virt_boundary_mask;
@@ -536,12 +560,9 @@ struct request_queue {
 #define QUEUE_FLAG_ADD_RANDOM	10	/* Contributes to random pool */
 #define QUEUE_FLAG_SYNCHRONOUS	11	/* always completes in submit context */
 #define QUEUE_FLAG_SAME_FORCE	12	/* force complete on same CPU */
-#define QUEUE_FLAG_HW_WC	13	/* Write back caching supported */
 #define QUEUE_FLAG_INIT_DONE	14	/* queue is initialized */
 #define QUEUE_FLAG_STABLE_WRITES 15	/* don't modify blks until WB is done */
 #define QUEUE_FLAG_POLL		16	/* IO polling enabled if set */
-#define QUEUE_FLAG_WC		17	/* Write back caching */
-#define QUEUE_FLAG_FUA		18	/* device supports FUA writes */
 #define QUEUE_FLAG_DAX		19	/* device supports DAX */
 #define QUEUE_FLAG_STATS	20	/* track IO start and completion times */
 #define QUEUE_FLAG_REGISTERED	22	/* queue has been registered to a disk */
@@ -951,7 +972,6 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
 		sector_t offset, const char *pfx);
 extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
-extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
 
 struct blk_independent_access_ranges *
 disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges);
@@ -1304,14 +1324,20 @@ static inline bool bdev_stable_writes(struct block_device *bdev)
 	return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags);
 }
 
+static inline bool blk_queue_write_cache(struct request_queue *q)
+{
+	return (q->limits.features & BLK_FEAT_WRITE_CACHE) &&
+		!(q->limits.flags & BLK_FLAGS_WRITE_CACHE_DISABLED);
+}
+
 static inline bool bdev_write_cache(struct block_device *bdev)
 {
-	return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags);
+	return blk_queue_write_cache(bdev_get_queue(bdev));
 }
 
 static inline bool bdev_fua(struct block_device *bdev)
 {
-	return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags);
+	return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA;
 }
 
 static inline bool bdev_nowait(struct block_device *bdev)
-- 
cgit v1.2.3


From bd4a633b6f7c3c6b6ebc1a07317643270e751a94 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:41 +0200
Subject: block: move the nonrot flag to queue_limits

Move the nonrot flag into the queue_limits feature field so that it can
be set atomically with the queue frozen.

Use the chance to switch to defaulting to non-rotational and require
the driver to opt into rotational, which matches the polarity of the
sysfs interface.

For the z2ram, ps3vram, 2x memstick, ubiblock and dcssblk the new
rotational flag is not set as they clearly are not rotational despite
this being a behavior change.  There are some other drivers that
unconditionally set the rotational flag to keep the existing behavior
as they arguably can be used on rotational devices even if that is
probably not their main use today (e.g. virtio_blk and drbd).

The flag is automatically inherited in blk_stack_limits matching the
existing behavior in dm and md.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-15-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index acdfe5122faa..988e3248cffe 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -289,14 +289,16 @@ enum {
 
 	/* supports passing on the FUA bit */
 	BLK_FEAT_FUA				= (1u << 1),
+
+	/* rotational device (hard drive or floppy) */
+	BLK_FEAT_ROTATIONAL			= (1u << 2),
 };
 
 /*
  * Flags automatically inherited when stacking limits.
  */
 #define BLK_FEAT_INHERIT_MASK \
-	(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA)
-
+	(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL)
 
 /* internal flags in queue_limits.flags */
 enum {
@@ -553,8 +555,6 @@ struct request_queue {
 #define QUEUE_FLAG_NOMERGES     3	/* disable merge attempts */
 #define QUEUE_FLAG_SAME_COMP	4	/* complete on same CPU-group */
 #define QUEUE_FLAG_FAIL_IO	5	/* fake timeout */
-#define QUEUE_FLAG_NONROT	6	/* non-rotational device (SSD) */
-#define QUEUE_FLAG_VIRT		QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT	7	/* do disk/partitions IO accounting */
 #define QUEUE_FLAG_NOXMERGES	9	/* No extended merges */
 #define QUEUE_FLAG_ADD_RANDOM	10	/* Contributes to random pool */
@@ -589,7 +589,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
-#define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
+#define blk_queue_nonrot(q)	((q)->limits.features & BLK_FEAT_ROTATIONAL)
 #define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 #define blk_queue_add_random(q)	test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
 #define blk_queue_zone_resetall(q)	\
-- 
cgit v1.2.3


From 39a9f1c334f9f27b3b3e6d0005c10ed667268346 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:42 +0200
Subject: block: move the add_random flag to queue_limits

Move the add_random flag into the queue_limits feature field so that it
can be set atomically with the queue frozen.

Note that this also removes code from dm to clear the flag based on
the underlying devices, which can't be reached as dm devices will
always start out without the flag set.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-16-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 988e3248cffe..cf1bbf566b2b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -292,6 +292,9 @@ enum {
 
 	/* rotational device (hard drive or floppy) */
 	BLK_FEAT_ROTATIONAL			= (1u << 2),
+
+	/* contributes to the random number pool */
+	BLK_FEAT_ADD_RANDOM			= (1u << 3),
 };
 
 /*
@@ -557,7 +560,6 @@ struct request_queue {
 #define QUEUE_FLAG_FAIL_IO	5	/* fake timeout */
 #define QUEUE_FLAG_IO_STAT	7	/* do disk/partitions IO accounting */
 #define QUEUE_FLAG_NOXMERGES	9	/* No extended merges */
-#define QUEUE_FLAG_ADD_RANDOM	10	/* Contributes to random pool */
 #define QUEUE_FLAG_SYNCHRONOUS	11	/* always completes in submit context */
 #define QUEUE_FLAG_SAME_FORCE	12	/* force complete on same CPU */
 #define QUEUE_FLAG_INIT_DONE	14	/* queue is initialized */
@@ -591,7 +593,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	((q)->limits.features & BLK_FEAT_ROTATIONAL)
 #define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
-#define blk_queue_add_random(q)	test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
 #define blk_queue_zone_resetall(q)	\
 	test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
 #define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
-- 
cgit v1.2.3


From cdb2497918cc2929691408bac87b58433b45b6d3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:43 +0200
Subject: block: move the io_stat flag setting to queue_limits

Move the io_stat flag into the queue_limits feature field so that it can
be set atomically with the queue frozen.

Simplify md and dm to set the flag unconditionally instead of avoiding
setting a simple flag for cases where it already is set by other means,
which is a bit pointless.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-17-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cf1bbf566b2b..5fafb2f95fd1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -295,6 +295,9 @@ enum {
 
 	/* contributes to the random number pool */
 	BLK_FEAT_ADD_RANDOM			= (1u << 3),
+
+	/* do disk/partitions IO accounting */
+	BLK_FEAT_IO_STAT			= (1u << 4),
 };
 
 /*
@@ -558,7 +561,6 @@ struct request_queue {
 #define QUEUE_FLAG_NOMERGES     3	/* disable merge attempts */
 #define QUEUE_FLAG_SAME_COMP	4	/* complete on same CPU-group */
 #define QUEUE_FLAG_FAIL_IO	5	/* fake timeout */
-#define QUEUE_FLAG_IO_STAT	7	/* do disk/partitions IO accounting */
 #define QUEUE_FLAG_NOXMERGES	9	/* No extended merges */
 #define QUEUE_FLAG_SYNCHRONOUS	11	/* always completes in submit context */
 #define QUEUE_FLAG_SAME_FORCE	12	/* force complete on same CPU */
@@ -577,8 +579,7 @@ struct request_queue {
 #define QUEUE_FLAG_SQ_SCHED     30	/* single queue style io dispatch */
 #define QUEUE_FLAG_SKIP_TAGSET_QUIESCE	31 /* quiesce_tagset skip the queue*/
 
-#define QUEUE_FLAG_MQ_DEFAULT	((1UL << QUEUE_FLAG_IO_STAT) |		\
-				 (1UL << QUEUE_FLAG_SAME_COMP) |	\
+#define QUEUE_FLAG_MQ_DEFAULT	((1UL << QUEUE_FLAG_SAME_COMP) |	\
 				 (1UL << QUEUE_FLAG_NOWAIT))
 
 void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
@@ -592,7 +593,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_noxmerges(q)	\
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	((q)->limits.features & BLK_FEAT_ROTATIONAL)
-#define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
+#define blk_queue_io_stat(q)	((q)->limits.features & BLK_FEAT_IO_STAT)
 #define blk_queue_zone_resetall(q)	\
 	test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
 #define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
-- 
cgit v1.2.3


From 1a02f3a73f8c670eddeb44bf52a75ae7f67cfc11 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:44 +0200
Subject: block: move the stable_writes flag to queue_limits

Move the stable_writes flag into the queue_limits feature field so that
it can be set atomically with the queue frozen.

The flag is now inherited by blk_stack_limits, which greatly simplifies
the code in dm, and fixed md which previously did not pass on the flag
set on lower devices.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-18-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5fafb2f95fd1..8936eb6ba609 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -298,13 +298,17 @@ enum {
 
 	/* do disk/partitions IO accounting */
 	BLK_FEAT_IO_STAT			= (1u << 4),
+
+	/* don't modify data until writeback is done */
+	BLK_FEAT_STABLE_WRITES			= (1u << 5),
 };
 
 /*
  * Flags automatically inherited when stacking limits.
  */
 #define BLK_FEAT_INHERIT_MASK \
-	(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL)
+	(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \
+	 BLK_FEAT_STABLE_WRITES)
 
 /* internal flags in queue_limits.flags */
 enum {
@@ -565,7 +569,6 @@ struct request_queue {
 #define QUEUE_FLAG_SYNCHRONOUS	11	/* always completes in submit context */
 #define QUEUE_FLAG_SAME_FORCE	12	/* force complete on same CPU */
 #define QUEUE_FLAG_INIT_DONE	14	/* queue is initialized */
-#define QUEUE_FLAG_STABLE_WRITES 15	/* don't modify blks until WB is done */
 #define QUEUE_FLAG_POLL		16	/* IO polling enabled if set */
 #define QUEUE_FLAG_DAX		19	/* device supports DAX */
 #define QUEUE_FLAG_STATS	20	/* track IO start and completion times */
@@ -1323,7 +1326,7 @@ static inline bool bdev_stable_writes(struct block_device *bdev)
 	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
 	    q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE)
 		return true;
-	return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags);
+	return q->limits.features & BLK_FEAT_STABLE_WRITES;
 }
 
 static inline bool blk_queue_write_cache(struct request_queue *q)
-- 
cgit v1.2.3


From aadd5c59c910427c0464c217d5ed588ff14e2502 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:45 +0200
Subject: block: move the synchronous flag to queue_limits

Move the synchronous flag into the queue_limits feature field so that it
can be set atomically with the queue frozen.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-19-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8936eb6ba609..cee7b44a1425 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -301,6 +301,9 @@ enum {
 
 	/* don't modify data until writeback is done */
 	BLK_FEAT_STABLE_WRITES			= (1u << 5),
+
+	/* always completes in submit context */
+	BLK_FEAT_SYNCHRONOUS			= (1u << 6),
 };
 
 /*
@@ -566,7 +569,6 @@ struct request_queue {
 #define QUEUE_FLAG_SAME_COMP	4	/* complete on same CPU-group */
 #define QUEUE_FLAG_FAIL_IO	5	/* fake timeout */
 #define QUEUE_FLAG_NOXMERGES	9	/* No extended merges */
-#define QUEUE_FLAG_SYNCHRONOUS	11	/* always completes in submit context */
 #define QUEUE_FLAG_SAME_FORCE	12	/* force complete on same CPU */
 #define QUEUE_FLAG_INIT_DONE	14	/* queue is initialized */
 #define QUEUE_FLAG_POLL		16	/* IO polling enabled if set */
@@ -1315,8 +1317,7 @@ static inline bool bdev_nonrot(struct block_device *bdev)
 
 static inline bool bdev_synchronous(struct block_device *bdev)
 {
-	return test_bit(QUEUE_FLAG_SYNCHRONOUS,
-			&bdev_get_queue(bdev)->queue_flags);
+	return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS;
 }
 
 static inline bool bdev_stable_writes(struct block_device *bdev)
-- 
cgit v1.2.3


From f76af42f8bf13d2620084f305f01691de9238fc7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:46 +0200
Subject: block: move the nowait flag to queue_limits

Move the nowait flag into the queue_limits feature field so that it can
be set atomically with the queue frozen.

Stacking drivers are simplified in that they now can simply set the
flag, and blk_stack_limits will clear it when the features is not
supported by any of the underlying devices.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-20-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cee7b44a1425..f3d4519d609d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -304,6 +304,9 @@ enum {
 
 	/* always completes in submit context */
 	BLK_FEAT_SYNCHRONOUS			= (1u << 6),
+
+	/* supports REQ_NOWAIT */
+	BLK_FEAT_NOWAIT				= (1u << 7),
 };
 
 /*
@@ -580,12 +583,10 @@ struct request_queue {
 #define QUEUE_FLAG_ZONE_RESETALL 26	/* supports Zone Reset All */
 #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
-#define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
 #define QUEUE_FLAG_SQ_SCHED     30	/* single queue style io dispatch */
 #define QUEUE_FLAG_SKIP_TAGSET_QUIESCE	31 /* quiesce_tagset skip the queue*/
 
-#define QUEUE_FLAG_MQ_DEFAULT	((1UL << QUEUE_FLAG_SAME_COMP) |	\
-				 (1UL << QUEUE_FLAG_NOWAIT))
+#define QUEUE_FLAG_MQ_DEFAULT	(1UL << QUEUE_FLAG_SAME_COMP)
 
 void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
 void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
@@ -1348,7 +1349,7 @@ static inline bool bdev_fua(struct block_device *bdev)
 
 static inline bool bdev_nowait(struct block_device *bdev)
 {
-	return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags);
+	return bdev->bd_disk->queue->limits.features & BLK_FEAT_NOWAIT;
 }
 
 static inline bool bdev_is_zoned(struct block_device *bdev)
-- 
cgit v1.2.3


From f467fee48da4500786e145489787b37adae317c3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:47 +0200
Subject: block: move the dax flag to queue_limits

Move the dax flag into the queue_limits feature field so that it can be
set atomically with the queue frozen.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-21-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f3d4519d609d..7022e06a3dd9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -307,6 +307,9 @@ enum {
 
 	/* supports REQ_NOWAIT */
 	BLK_FEAT_NOWAIT				= (1u << 7),
+
+	/* supports DAX */
+	BLK_FEAT_DAX				= (1u << 8),
 };
 
 /*
@@ -575,7 +578,6 @@ struct request_queue {
 #define QUEUE_FLAG_SAME_FORCE	12	/* force complete on same CPU */
 #define QUEUE_FLAG_INIT_DONE	14	/* queue is initialized */
 #define QUEUE_FLAG_POLL		16	/* IO polling enabled if set */
-#define QUEUE_FLAG_DAX		19	/* device supports DAX */
 #define QUEUE_FLAG_STATS	20	/* track IO start and completion times */
 #define QUEUE_FLAG_REGISTERED	22	/* queue has been registered to a disk */
 #define QUEUE_FLAG_QUIESCED	24	/* queue has been quiesced */
@@ -602,7 +604,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_io_stat(q)	((q)->limits.features & BLK_FEAT_IO_STAT)
 #define blk_queue_zone_resetall(q)	\
 	test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
-#define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
+#define blk_queue_dax(q)	((q)->limits.features & BLK_FEAT_DAX)
 #define blk_queue_pci_p2pdma(q)	\
 	test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
-- 
cgit v1.2.3


From 8023e144f9d6e35f8786937e2f0c2fea0aba6dbc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:48 +0200
Subject: block: move the poll flag to queue_limits

Move the poll flag into the queue_limits feature field so that it can
be set atomically with the queue frozen.

Stacking drivers are simplified in that they now can simply set the
flag, and blk_stack_limits will clear it when the features is not
supported by any of the underlying devices.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-22-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7022e06a3dd9..cd27b66cbacc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -310,6 +310,9 @@ enum {
 
 	/* supports DAX */
 	BLK_FEAT_DAX				= (1u << 8),
+
+	/* supports I/O polling */
+	BLK_FEAT_POLL				= (1u << 9),
 };
 
 /*
@@ -577,7 +580,6 @@ struct request_queue {
 #define QUEUE_FLAG_NOXMERGES	9	/* No extended merges */
 #define QUEUE_FLAG_SAME_FORCE	12	/* force complete on same CPU */
 #define QUEUE_FLAG_INIT_DONE	14	/* queue is initialized */
-#define QUEUE_FLAG_POLL		16	/* IO polling enabled if set */
 #define QUEUE_FLAG_STATS	20	/* track IO start and completion times */
 #define QUEUE_FLAG_REGISTERED	22	/* queue has been registered to a disk */
 #define QUEUE_FLAG_QUIESCED	24	/* queue has been quiesced */
-- 
cgit v1.2.3


From b1fc937a55f5735b98d9dceae5bb6ba262501f56 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:49 +0200
Subject: block: move the zoned flag into the features field

Move the zoned flags into the features field to reclaim a little
bit of space.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-23-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cd27b66cbacc..bdc30c1fb1b5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -313,6 +313,9 @@ enum {
 
 	/* supports I/O polling */
 	BLK_FEAT_POLL				= (1u << 9),
+
+	/* is a zoned device */
+	BLK_FEAT_ZONED				= (1u << 10),
 };
 
 /*
@@ -320,7 +323,7 @@ enum {
  */
 #define BLK_FEAT_INHERIT_MASK \
 	(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \
-	 BLK_FEAT_STABLE_WRITES)
+	 BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED)
 
 /* internal flags in queue_limits.flags */
 enum {
@@ -372,7 +375,6 @@ struct queue_limits {
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
 	unsigned char		raid_partial_stripes_expensive;
-	bool			zoned;
 	unsigned int		max_open_zones;
 	unsigned int		max_active_zones;
 
@@ -654,7 +656,8 @@ static inline enum rpm_status queue_rpm_status(struct request_queue *q)
 
 static inline bool blk_queue_is_zoned(struct request_queue *q)
 {
-	return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && q->limits.zoned;
+	return IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+		(q->limits.features & BLK_FEAT_ZONED);
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
-- 
cgit v1.2.3


From a52758a39768f441e468a41da6c15a59d6d6011a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:50 +0200
Subject: block: move the zone_resetall flag to queue_limits

Move the zone_resetall flag into the queue_limits feature field so that
it can be set atomically with the queue frozen.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-24-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bdc30c1fb1b5..1077cb8d8fd8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -316,6 +316,9 @@ enum {
 
 	/* is a zoned device */
 	BLK_FEAT_ZONED				= (1u << 10),
+
+	/* supports Zone Reset All */
+	BLK_FEAT_ZONE_RESETALL			= (1u << 11),
 };
 
 /*
@@ -586,7 +589,6 @@ struct request_queue {
 #define QUEUE_FLAG_REGISTERED	22	/* queue has been registered to a disk */
 #define QUEUE_FLAG_QUIESCED	24	/* queue has been quiesced */
 #define QUEUE_FLAG_PCI_P2PDMA	25	/* device supports PCI p2p requests */
-#define QUEUE_FLAG_ZONE_RESETALL 26	/* supports Zone Reset All */
 #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
 #define QUEUE_FLAG_SQ_SCHED     30	/* single queue style io dispatch */
@@ -607,7 +609,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_nonrot(q)	((q)->limits.features & BLK_FEAT_ROTATIONAL)
 #define blk_queue_io_stat(q)	((q)->limits.features & BLK_FEAT_IO_STAT)
 #define blk_queue_zone_resetall(q)	\
-	test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
+	((q)->limits.features & BLK_FEAT_ZONE_RESETALL)
 #define blk_queue_dax(q)	((q)->limits.features & BLK_FEAT_DAX)
 #define blk_queue_pci_p2pdma(q)	\
 	test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
-- 
cgit v1.2.3


From 9c1e42e3c876c66796eda23e79836a4d92613a61 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:51 +0200
Subject: block: move the pci_p2pdma flag to queue_limits

Move the pci_p2pdma flag into the queue_limits feature field so that it
can be set atomically with the queue frozen.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-25-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1077cb8d8fd8..ab0f7dfba556 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -319,6 +319,9 @@ enum {
 
 	/* supports Zone Reset All */
 	BLK_FEAT_ZONE_RESETALL			= (1u << 11),
+
+	/* supports PCI(e) p2p requests */
+	BLK_FEAT_PCI_P2PDMA			= (1u << 12),
 };
 
 /*
@@ -588,7 +591,6 @@ struct request_queue {
 #define QUEUE_FLAG_STATS	20	/* track IO start and completion times */
 #define QUEUE_FLAG_REGISTERED	22	/* queue has been registered to a disk */
 #define QUEUE_FLAG_QUIESCED	24	/* queue has been quiesced */
-#define QUEUE_FLAG_PCI_P2PDMA	25	/* device supports PCI p2p requests */
 #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
 #define QUEUE_FLAG_SQ_SCHED     30	/* single queue style io dispatch */
@@ -611,8 +613,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_zone_resetall(q)	\
 	((q)->limits.features & BLK_FEAT_ZONE_RESETALL)
 #define blk_queue_dax(q)	((q)->limits.features & BLK_FEAT_DAX)
-#define blk_queue_pci_p2pdma(q)	\
-	test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
+#define blk_queue_pci_p2pdma(q)	((q)->limits.features & BLK_FEAT_PCI_P2PDMA)
 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
 #define blk_queue_rq_alloc_time(q)	\
 	test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags)
-- 
cgit v1.2.3


From 8c8f5c85b20d0a7dc0ab9b2a17318130d69ceb5a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:52 +0200
Subject: block: move the skip_tagset_quiesce flag to queue_limits

Move the skip_tagset_quiesce flag into the queue_limits feature field so
that it can be set atomically with the queue frozen.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-26-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ab0f7dfba556..2c433ebf6f20 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -322,6 +322,9 @@ enum {
 
 	/* supports PCI(e) p2p requests */
 	BLK_FEAT_PCI_P2PDMA			= (1u << 12),
+
+	/* skip this queue in blk_mq_(un)quiesce_tagset */
+	BLK_FEAT_SKIP_TAGSET_QUIESCE		= (1u << 13),
 };
 
 /*
@@ -594,7 +597,6 @@ struct request_queue {
 #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
 #define QUEUE_FLAG_SQ_SCHED     30	/* single queue style io dispatch */
-#define QUEUE_FLAG_SKIP_TAGSET_QUIESCE	31 /* quiesce_tagset skip the queue*/
 
 #define QUEUE_FLAG_MQ_DEFAULT	(1UL << QUEUE_FLAG_SAME_COMP)
 
@@ -629,7 +631,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
 #define blk_queue_sq_sched(q)	test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
 #define blk_queue_skip_tagset_quiesce(q) \
-	test_bit(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, &(q)->queue_flags)
+	((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
-- 
cgit v1.2.3


From 339d3948c07b4aa2940aeb874294a7d6782cec16 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Jun 2024 08:04:53 +0200
Subject: block: move the bounce flag into the features field

Move the bounce flag into the features field to reclaim a little bit of
space.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20240617060532.127975-27-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2c433ebf6f20..e96ba7b97288 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -325,6 +325,9 @@ enum {
 
 	/* skip this queue in blk_mq_(un)quiesce_tagset */
 	BLK_FEAT_SKIP_TAGSET_QUIESCE		= (1u << 13),
+
+	/* bounce all highmem pages */
+	BLK_FEAT_BOUNCE_HIGH			= (1u << 14),
 };
 
 /*
@@ -332,7 +335,7 @@ enum {
  */
 #define BLK_FEAT_INHERIT_MASK \
 	(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \
-	 BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED)
+	 BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH)
 
 /* internal flags in queue_limits.flags */
 enum {
@@ -352,7 +355,6 @@ enum blk_bounce {
 struct queue_limits {
 	unsigned int		features;
 	unsigned int		flags;
-	enum blk_bounce		bounce;
 	unsigned long		seg_boundary_mask;
 	unsigned long		virt_boundary_mask;
 
-- 
cgit v1.2.3


From b9578c49456340ca4d3c7ddbaca054ffc2b51bc1 Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Thu, 6 Jun 2024 14:02:13 +1200
Subject: dma-buf/heaps: Correct the types of fd_flags and heap_flags

dma_heap_allocation_data defines the UAPI as follows:

 struct dma_heap_allocation_data {
        __u64 len;
        __u32 fd;
        __u32 fd_flags;
        __u64 heap_flags;
 };

But dma heaps are casting both fd_flags and heap_flags into
unsigned long. This patch makes dma heaps - cma heap and
system heap have consistent types with UAPI.

Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Acked-by: John Stultz <jstultz@google.com>
Reviewed-by: Carlos Llamas <cmllamas@google.com>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240606020213.49854-1-21cnbao@gmail.com
---
 include/linux/dma-heap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h
index 0c05561cad6e..064bad725061 100644
--- a/include/linux/dma-heap.h
+++ b/include/linux/dma-heap.h
@@ -23,8 +23,8 @@ struct dma_heap;
 struct dma_heap_ops {
 	struct dma_buf *(*allocate)(struct dma_heap *heap,
 				    unsigned long len,
-				    unsigned long fd_flags,
-				    unsigned long heap_flags);
+				    u32 fd_flags,
+				    u64 heap_flags);
 };
 
 /**
-- 
cgit v1.2.3


From a2225e0250c5fa397dcebf6ce65a9f05a114e0cf Mon Sep 17 00:00:00 2001
From: Jianguo Wu <wujianguo@chinatelecom.cn>
Date: Thu, 13 Jun 2024 17:42:47 +0800
Subject: netfilter: move the sysctl nf_hooks_lwtunnel into the netfilter core

Currently, the sysctl net.netfilter.nf_hooks_lwtunnel depends on the
nf_conntrack module, but the nf_conntrack module is not always loaded.
Therefore, accessing net.netfilter.nf_hooks_lwtunnel may have an error.

Move sysctl nf_hooks_lwtunnel into the netfilter core.

Fixes: 7a3f5b0de364 ("netfilter: add netfilter hooks to SRv6 data plane")
Suggested-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netns/netfilter.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 02bbdc577f8e..a6a0bf4a247e 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -15,6 +15,9 @@ struct netns_nf {
 	const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO];
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header *nf_log_dir_header;
+#ifdef CONFIG_LWTUNNEL
+	struct ctl_table_header *nf_lwtnl_dir_header;
+#endif
 #endif
 	struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS];
 	struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS];
-- 
cgit v1.2.3


From e78298556ee5d881f6679effb2a6743969ea6e2d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 4 Jun 2024 12:30:02 -0700
Subject: runtime constants: add default dummy infrastructure

This adds the initial dummy support for 'runtime constants' for when
an architecture doesn't actually support an implementation of fixing
up said runtime constants.

This ends up being the fallback to just using the variables as regular
__ro_after_init variables, and changes the dcache d_hash() function to
use this model.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/Kbuild          |  1 +
 include/asm-generic/runtime-const.h | 15 +++++++++++++++
 include/asm-generic/vmlinux.lds.h   |  8 ++++++++
 3 files changed, 24 insertions(+)
 create mode 100644 include/asm-generic/runtime-const.h

(limited to 'include')

diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index b20fa25a7e8d..052e5c98c105 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -46,6 +46,7 @@ mandatory-y += pci.h
 mandatory-y += percpu.h
 mandatory-y += pgalloc.h
 mandatory-y += preempt.h
+mandatory-y += runtime-const.h
 mandatory-y += rwonce.h
 mandatory-y += sections.h
 mandatory-y += serial.h
diff --git a/include/asm-generic/runtime-const.h b/include/asm-generic/runtime-const.h
new file mode 100644
index 000000000000..670499459514
--- /dev/null
+++ b/include/asm-generic/runtime-const.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+/*
+ * This is the fallback for when the architecture doesn't
+ * support the runtime const operations.
+ *
+ * We just use the actual symbols as-is.
+ */
+#define runtime_const_ptr(sym) (sym)
+#define runtime_const_shift_right_32(val, sym) ((u32)(val)>>(sym))
+#define runtime_const_init(type,sym) do { } while (0)
+
+#endif
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5703526d6ebf..389a78415b9b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -944,6 +944,14 @@
 #define CON_INITCALL							\
 	BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
 
+#define RUNTIME_NAME(t,x) runtime_##t##_##x
+
+#define RUNTIME_CONST(t,x)						\
+	. = ALIGN(8);							\
+	RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) {	\
+		*(RUNTIME_NAME(t,x));					\
+	}
+
 /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
 #define KUNIT_TABLE()							\
 		. = ALIGN(8);						\
-- 
cgit v1.2.3


From 2003e483a81cc235e29f77da3f6b256cb4b348e7 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Wed, 19 Jun 2024 13:31:05 -0700
Subject: fortify: Do not special-case 0-sized destinations

All fake flexible arrays should have been removed now, so remove the
special casing that was avoiding checking them. If a destination claims
to be 0 sized, believe it. This is especially important for cases where
__counted_by is in use and may have a 0 element count.

Link: https://lore.kernel.org/r/20240619203105.work.747-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/fortify-string.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 7e0f340bf363..0d99bf11d260 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -601,11 +601,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
 	/*
 	 * Warn when writing beyond destination field size.
 	 *
-	 * We must ignore p_size_field == 0 for existing 0-element
-	 * fake flexible arrays, until they are all converted to
-	 * proper flexible arrays.
-	 *
-	 * The implementation of __builtin_*object_size() behaves
+	 * Note the implementation of __builtin_*object_size() behaves
 	 * like sizeof() when not directly referencing a flexible
 	 * array member, which means there will be many bounds checks
 	 * that will appear at run-time, without a way for them to be
@@ -613,7 +609,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
 	 * is specifically the flexible array member).
 	 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101832
 	 */
-	if (p_size_field != 0 && p_size_field != SIZE_MAX &&
+	if (p_size_field != SIZE_MAX &&
 	    p_size != p_size_field && p_size_field < size)
 		return true;
 
-- 
cgit v1.2.3


From ba63a7e08523be3496cc1b7e5f77d306144a3a40 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Sat, 20 Apr 2024 21:47:46 +0200
Subject: can: isotp: remove ISO 15675-2 specification version where possible

With the new ISO 15765-2:2024 release the former documentation and comments
have to be reworked. This patch removes the ISO specification version/date
where possible.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Acked-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Acked-by: Francesco Valla <valla.francesco@gmail.com>
Link: https://lore.kernel.org/all/20240420194746.4885-1-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/isotp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h
index 6cde62371b6f..bd990917f7c4 100644
--- a/include/uapi/linux/can/isotp.h
+++ b/include/uapi/linux/can/isotp.h
@@ -2,7 +2,7 @@
 /*
  * linux/can/isotp.h
  *
- * Definitions for isotp CAN sockets (ISO 15765-2:2016)
+ * Definitions for ISO 15765-2 CAN transport protocol sockets
  *
  * Copyright (c) 2020 Volkswagen Group Electronic Research
  * All rights reserved.
-- 
cgit v1.2.3


From 269e974e664207cc45f83b579565ba73de1b75dc Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 14 Jun 2024 11:41:02 +0200
Subject: driver core: make [device_]driver_attach take a const *

Change device_driver_attach() and driver_attach() to take a const * to
struct device driver as neither of them modify the structure at all.

Also, for some odd reason, drivers/dma/idxd/compat.c had a duplicate
external reference to device_driver_attach(), so remove that to fix up
the build, it should never have had that there in the first place.

Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Vinod Koul <vkoul@kernel.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Petr Tesarik <petr.tesarik.ext@huawei.com>
Cc: Alexander Lobakin <aleksander.lobakin@intel.com>
Cc: dmaengine@vger.kernel.org
Link: https://lore.kernel.org/r/2024061401-rasping-manger-c385@gregkh
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 3a630942f358..34eb20f5966f 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1177,12 +1177,12 @@ static inline void *dev_get_platdata(const struct device *dev)
  * Manual binding of a device to driver. See drivers/base/bus.c
  * for information on use.
  */
-int __must_check device_driver_attach(struct device_driver *drv,
+int __must_check device_driver_attach(const struct device_driver *drv,
 				      struct device *dev);
 int __must_check device_bind_driver(struct device *dev);
 void device_release_driver(struct device *dev);
 int  __must_check device_attach(struct device *dev);
-int __must_check driver_attach(struct device_driver *drv);
+int __must_check driver_attach(const struct device_driver *drv);
 void device_initial_probe(struct device *dev);
 int __must_check device_reprobe(struct device *dev);
 
-- 
cgit v1.2.3


From f6860b6069b92559f5cdb65f48e2d82051eaebca Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 Jun 2024 17:45:33 +0200
Subject: block: remove the unused blk_bounce enum

The enum has been replaced with the BLK_FEAT_BOUNCE_HIGH flag.

Reported-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20240619154623.450048-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e96ba7b97288..f7d275e3fb2c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -343,15 +343,6 @@ enum {
 	BLK_FLAGS_WRITE_CACHE_DISABLED		= (1u << 31),
 };
 
-/*
- * BLK_BOUNCE_NONE:	never bounce (default)
- * BLK_BOUNCE_HIGH:	bounce all highmem pages
- */
-enum blk_bounce {
-	BLK_BOUNCE_NONE,
-	BLK_BOUNCE_HIGH,
-};
-
 struct queue_limits {
 	unsigned int		features;
 	unsigned int		flags;
-- 
cgit v1.2.3


From bae1c74316b86c67c95658c3a0cd312cec9aad77 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 Jun 2024 17:45:35 +0200
Subject: block: renumber and rename the cache disabled flag

Start with the first bit, and drop the plural-S from the name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20240619154623.450048-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f7d275e3fb2c..713a98b6dbba 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -339,8 +339,8 @@ enum {
 
 /* internal flags in queue_limits.flags */
 enum {
-	/* do not send FLUSH or FUA command despite advertised write cache */
-	BLK_FLAGS_WRITE_CACHE_DISABLED		= (1u << 31),
+	/* do not send FLUSH/FUA commands despite advertising a write cache */
+	BLK_FLAG_WRITE_CACHE_DISABLED		= (1u << 0),
 };
 
 struct queue_limits {
@@ -1339,7 +1339,7 @@ static inline bool bdev_stable_writes(struct block_device *bdev)
 static inline bool blk_queue_write_cache(struct request_queue *q)
 {
 	return (q->limits.features & BLK_FEAT_WRITE_CACHE) &&
-		!(q->limits.flags & BLK_FLAGS_WRITE_CACHE_DISABLED);
+		!(q->limits.flags & BLK_FLAG_WRITE_CACHE_DISABLED);
 }
 
 static inline bool bdev_write_cache(struct block_device *bdev)
-- 
cgit v1.2.3


From 5543217be468268dfedf504f4969771b9a377353 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 Jun 2024 17:45:36 +0200
Subject: block: move the misaligned flag into the features field

Move the misaligned flags into the features field to reclaim a little
bit of space.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20240619154623.450048-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 713a98b6dbba..7ad2b1240fc0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -341,6 +341,9 @@ enum {
 enum {
 	/* do not send FLUSH/FUA commands despite advertising a write cache */
 	BLK_FLAG_WRITE_CACHE_DISABLED		= (1u << 0),
+
+	/* I/O topology is misaligned */
+	BLK_FEAT_MISALIGNED			= (1u << 1),
 };
 
 struct queue_limits {
@@ -374,7 +377,6 @@ struct queue_limits {
 	unsigned short		max_integrity_segments;
 	unsigned short		max_discard_segments;
 
-	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
 	unsigned char		raid_partial_stripes_expensive;
 	unsigned int		max_open_zones;
-- 
cgit v1.2.3


From 4cac3d3a712b5c76d462b29b73b9e58c0b6d9946 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 Jun 2024 17:45:37 +0200
Subject: block: remove the discard_alignment flag

queue_limits.discard_alignment is never read except in the places
where it is stacked into another limit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20240619154623.450048-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7ad2b1240fc0..86410ce41bf6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -377,7 +377,6 @@ struct queue_limits {
 	unsigned short		max_integrity_segments;
 	unsigned short		max_discard_segments;
 
-	unsigned char		discard_misaligned;
 	unsigned char		raid_partial_stripes_expensive;
 	unsigned int		max_open_zones;
 	unsigned int		max_active_zones;
-- 
cgit v1.2.3


From 7d4dec525f5fd555037486af4d02dd3682655ba1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 Jun 2024 17:45:38 +0200
Subject: block: move the raid_partial_stripes_expensive flag into the features
 field

Move the raid_partial_stripes_expensive flags into the features field to
reclaim a little bit of space.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20240619154623.450048-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 86410ce41bf6..1fa2b148c206 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -328,6 +328,9 @@ enum {
 
 	/* bounce all highmem pages */
 	BLK_FEAT_BOUNCE_HIGH			= (1u << 14),
+
+	/* undocumented magic for bcache */
+	BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE	= (1u << 15),
 };
 
 /*
@@ -335,7 +338,8 @@ enum {
  */
 #define BLK_FEAT_INHERIT_MASK \
 	(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \
-	 BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH)
+	 BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \
+	 BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE)
 
 /* internal flags in queue_limits.flags */
 enum {
@@ -377,7 +381,6 @@ struct queue_limits {
 	unsigned short		max_integrity_segments;
 	unsigned short		max_discard_segments;
 
-	unsigned char		raid_partial_stripes_expensive;
 	unsigned int		max_open_zones;
 	unsigned int		max_active_zones;
 
-- 
cgit v1.2.3


From 0ddd2ae586d28e521d37393364d989ce118802e0 Mon Sep 17 00:00:00 2001
From: Zhu Lingshan <lingshan.zhu@amd.com>
Date: Tue, 4 Jun 2024 16:49:34 +0800
Subject: drm/ttm: increase ttm pre-fault value to PMD size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ttm page fault handler ttm_bo_vm_fault_reserved() maps
TTM_BO_VM_NUM_PREFAULT more pages beforehand
due to the principle of locality.

However, on some platform the page faults are more costly, this
patch intends to increase the number of ttm pre-fault to relieve
the number of page faults.

When multiple levels of page table is supported, the new default
value would be the PMD size, similar to huge page.

Signed-off-by: Zhu Lingshan <lingshan.zhu@amd.com>
Reported-and-tested-by: Li Jingxiang <jingxiang.li@ecarxgroup.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240604084934.225738-1-lingshan.zhu@amd.com
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/drm/ttm/ttm_bo.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 6ccf96c91f3a..ef0f52f56ebc 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -39,7 +39,11 @@
 #include "ttm_device.h"
 
 /* Default number of pre-faulted pages in the TTM fault handler */
+#if CONFIG_PGTABLE_LEVELS > 2
+#define TTM_BO_VM_NUM_PREFAULT (1 << (PMD_SHIFT - PAGE_SHIFT))
+#else
 #define TTM_BO_VM_NUM_PREFAULT 16
+#endif
 
 struct iosys_map;
 
-- 
cgit v1.2.3


From f70167a7a6e7e8a6911f3a216dc044cbfe7c1983 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Thu, 20 Jun 2024 12:53:51 +0000
Subject: block: Generalize chunk_sectors support as boundary support

The purpose of the chunk_sectors limit is to ensure that a mergeble request
fits within the boundary of the chunck_sector value.

Such a feature will be useful for other request_queue boundary limits, so
generalize the chunk_sectors merge code.

This idea was proposed by Hannes Reinecke.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20240620125359.2684798-3-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0e8253c1507a..fb7d4c21bba8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -907,14 +907,15 @@ static inline bool bio_straddles_zones(struct bio *bio)
 }
 
 /*
- * Return how much of the chunk is left to be used for I/O at a given offset.
+ * Return how much within the boundary is left to be used for I/O at a given
+ * offset.
  */
-static inline unsigned int blk_chunk_sectors_left(sector_t offset,
-		unsigned int chunk_sectors)
+static inline unsigned int blk_boundary_sectors_left(sector_t offset,
+		unsigned int boundary_sectors)
 {
-	if (unlikely(!is_power_of_2(chunk_sectors)))
-		return chunk_sectors - sector_div(offset, chunk_sectors);
-	return chunk_sectors - (offset & (chunk_sectors - 1));
+	if (unlikely(!is_power_of_2(boundary_sectors)))
+		return boundary_sectors - sector_div(offset, boundary_sectors);
+	return boundary_sectors - (offset & (boundary_sectors - 1));
 }
 
 /**
-- 
cgit v1.2.3


From c34fc6f26ab86d03a2d47446f42b6cd492dfdc56 Mon Sep 17 00:00:00 2001
From: Prasad Singamsetty <prasad.singamsetty@oracle.com>
Date: Thu, 20 Jun 2024 12:53:52 +0000
Subject: fs: Initial atomic write support

An atomic write is a write issued with torn-write protection, meaning
that for a power failure or any other hardware failure, all or none of the
data from the write will be stored, but never a mix of old and new data.

Userspace may add flag RWF_ATOMIC to pwritev2() to indicate that the
write is to be issued with torn-write prevention, according to special
alignment and length rules.

For any syscall interface utilizing struct iocb, add IOCB_ATOMIC for
iocb->ki_flags field to indicate the same.

A call to statx will give the relevant atomic write info for a file:
- atomic_write_unit_min
- atomic_write_unit_max
- atomic_write_segments_max

Both min and max values must be a power-of-2.

Applications can avail of atomic write feature by ensuring that the total
length of a write is a power-of-2 in size and also sized between
atomic_write_unit_min and atomic_write_unit_max, inclusive. Applications
must ensure that the write is at a naturally-aligned offset in the file
wrt the total write length. The value in atomic_write_segments_max
indicates the upper limit for IOV_ITER iovcnt.

Add file mode flag FMODE_CAN_ATOMIC_WRITE, so files which do not have the
flag set will have RWF_ATOMIC rejected and not just ignored.

Add a type argument to kiocb_set_rw_flags() to allows reads which have
RWF_ATOMIC set to be rejected.

Helper function generic_atomic_write_valid() can be used by FSes to verify
compliant writes. There we check for iov_iter type is for ubuf, which
implies iovcnt==1 for pwritev2(), which is an initial restriction for
atomic_write_segments_max. Initially the only user will be bdev file
operations write handler. We will rely on the block BIO submission path to
ensure write sizes are compliant for the bdev, so we don't need to check
atomic writes sizes yet.

Signed-off-by: Prasad Singamsetty <prasad.singamsetty@oracle.com>
jpg: merge into single patch and much rewrite
Acked-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20240620125359.2684798-4-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h      | 17 +++++++++++++++--
 include/uapi/linux/fs.h |  5 ++++-
 2 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0283cf366c2a..e049414bef7d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -125,8 +125,10 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define FMODE_EXEC		((__force fmode_t)(1 << 5))
 /* File writes are restricted (block device specific) */
 #define FMODE_WRITE_RESTRICTED	((__force fmode_t)(1 << 6))
+/* File supports atomic writes */
+#define FMODE_CAN_ATOMIC_WRITE	((__force fmode_t)(1 << 7))
 
-/* FMODE_* bits 7 to 8 */
+/* FMODE_* bit 8 */
 
 /* 32bit hashes as llseek() offset (for directories) */
 #define FMODE_32BITHASH         ((__force fmode_t)(1 << 9))
@@ -317,6 +319,7 @@ struct readahead_control;
 #define IOCB_SYNC		(__force int) RWF_SYNC
 #define IOCB_NOWAIT		(__force int) RWF_NOWAIT
 #define IOCB_APPEND		(__force int) RWF_APPEND
+#define IOCB_ATOMIC		(__force int) RWF_ATOMIC
 
 /* non-RWF related bits - start at 16 */
 #define IOCB_EVENTFD		(1 << 16)
@@ -351,6 +354,7 @@ struct readahead_control;
 	{ IOCB_SYNC,		"SYNC" }, \
 	{ IOCB_NOWAIT,		"NOWAIT" }, \
 	{ IOCB_APPEND,		"APPEND" }, \
+	{ IOCB_ATOMIC,		"ATOMIC"}, \
 	{ IOCB_EVENTFD,		"EVENTFD"}, \
 	{ IOCB_DIRECT,		"DIRECT" }, \
 	{ IOCB_WRITE,		"WRITE" }, \
@@ -3403,7 +3407,8 @@ static inline int iocb_flags(struct file *file)
 	return res;
 }
 
-static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
+static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags,
+				     int rw_type)
 {
 	int kiocb_flags = 0;
 
@@ -3422,6 +3427,12 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
 			return -EOPNOTSUPP;
 		kiocb_flags |= IOCB_NOIO;
 	}
+	if (flags & RWF_ATOMIC) {
+		if (rw_type != WRITE)
+			return -EOPNOTSUPP;
+		if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE))
+			return -EOPNOTSUPP;
+	}
 	kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
 	if (flags & RWF_SYNC)
 		kiocb_flags |= IOCB_DSYNC;
@@ -3613,4 +3624,6 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
 extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
 			   int advice);
 
+bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos);
+
 #endif /* _LINUX_FS_H */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 45e4e64fd664..191a7e88a8ab 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -329,9 +329,12 @@ typedef int __bitwise __kernel_rwf_t;
 /* per-IO negation of O_APPEND */
 #define RWF_NOAPPEND	((__force __kernel_rwf_t)0x00000020)
 
+/* Atomic Write */
+#define RWF_ATOMIC	((__force __kernel_rwf_t)0x00000040)
+
 /* mask of flags supported by the kernel */
 #define RWF_SUPPORTED	(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
-			 RWF_APPEND | RWF_NOAPPEND)
+			 RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC)
 
 /* Pagemap ioctl */
 #define PAGEMAP_SCAN	_IOWR('f', 16, struct pm_scan_arg)
-- 
cgit v1.2.3


From 0f9ca80fa4f9670ba09721e4e36b8baf086a500c Mon Sep 17 00:00:00 2001
From: Prasad Singamsetty <prasad.singamsetty@oracle.com>
Date: Thu, 20 Jun 2024 12:53:53 +0000
Subject: fs: Add initial atomic write support info to statx

Extend statx system call to return additional info for atomic write support
support for a file.

Helper function generic_fill_statx_atomic_writes() can be used by FSes to
fill in the relevant statx fields. For now atomic_write_segments_max will
always be 1, otherwise some rules would need to be imposed on iovec length
and alignment, which we don't want now.

Signed-off-by: Prasad Singamsetty <prasad.singamsetty@oracle.com>
jpg: relocate bdev support to another patch
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20240620125359.2684798-5-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h        |  3 +++
 include/linux/stat.h      |  3 +++
 include/uapi/linux/stat.h | 12 ++++++++++--
 3 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e049414bef7d..db26b4a70c62 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3235,6 +3235,9 @@ extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_link(void *);
 void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
 void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
+void generic_fill_statx_atomic_writes(struct kstat *stat,
+				      unsigned int unit_min,
+				      unsigned int unit_max);
 extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
 extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
diff --git a/include/linux/stat.h b/include/linux/stat.h
index bf92441dbad2..3d900c86981c 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -54,6 +54,9 @@ struct kstat {
 	u32		dio_offset_align;
 	u64		change_cookie;
 	u64		subvol;
+	u32		atomic_write_unit_min;
+	u32		atomic_write_unit_max;
+	u32		atomic_write_segments_max;
 };
 
 /* These definitions are internal to the kernel for now. Mainly used by nfsd. */
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 67626d535316..887a25286441 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -126,9 +126,15 @@ struct statx {
 	__u64	stx_mnt_id;
 	__u32	stx_dio_mem_align;	/* Memory buffer alignment for direct I/O */
 	__u32	stx_dio_offset_align;	/* File offset alignment for direct I/O */
-	__u64	stx_subvol;	/* Subvolume identifier */
 	/* 0xa0 */
-	__u64	__spare3[11];	/* Spare space for future expansion */
+	__u64	stx_subvol;	/* Subvolume identifier */
+	__u32	stx_atomic_write_unit_min;	/* Min atomic write unit in bytes */
+	__u32	stx_atomic_write_unit_max;	/* Max atomic write unit in bytes */
+	/* 0xb0 */
+	__u32   stx_atomic_write_segments_max;	/* Max atomic write segment count */
+	__u32   __spare1[1];
+	/* 0xb8 */
+	__u64	__spare3[9];	/* Spare space for future expansion */
 	/* 0x100 */
 };
 
@@ -157,6 +163,7 @@ struct statx {
 #define STATX_DIOALIGN		0x00002000U	/* Want/got direct I/O alignment info */
 #define STATX_MNT_ID_UNIQUE	0x00004000U	/* Want/got extended stx_mount_id */
 #define STATX_SUBVOL		0x00008000U	/* Want/got stx_subvol */
+#define STATX_WRITE_ATOMIC	0x00010000U	/* Want/got atomic_write_* fields */
 
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
 
@@ -192,6 +199,7 @@ struct statx {
 #define STATX_ATTR_MOUNT_ROOT		0x00002000 /* Root of a mount */
 #define STATX_ATTR_VERITY		0x00100000 /* [I] Verity protected file */
 #define STATX_ATTR_DAX			0x00200000 /* File is currently in DAX state */
+#define STATX_ATTR_WRITE_ATOMIC		0x00400000 /* File supports atomic write operations */
 
 
 #endif /* _UAPI_LINUX_STAT_H */
-- 
cgit v1.2.3


From 9da3d1e912f3953196e66991d75208cde3e845e1 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Thu, 20 Jun 2024 12:53:54 +0000
Subject: block: Add core atomic write support

Add atomic write support, as follows:
- add helper functions to get request_queue atomic write limits
- report request_queue atomic write support limits to sysfs and update Doc
- support to safely merge atomic writes
- deal with splitting atomic writes
- misc helper functions
- add a per-request atomic write flag

New request_queue limits are added, as follows:
- atomic_write_hw_max is set by the block driver and is the maximum length
  of an atomic write which the device may support. It is not
  necessarily a power-of-2.
- atomic_write_max_sectors is derived from atomic_write_hw_max_sectors and
  max_hw_sectors. It is always a power-of-2. Atomic writes may be merged,
  and atomic_write_max_sectors would be the limit on a merged atomic write
  request size. This value is not capped at max_sectors, as the value in
  max_sectors can be controlled from userspace, and it would only cause
  trouble if userspace could limit atomic_write_unit_max_bytes and the
  other atomic write limits.
- atomic_write_hw_unit_{min,max} are set by the block driver and are the
  min/max length of an atomic write unit which the device may support. They
  both must be a power-of-2. Typically atomic_write_hw_unit_max will hold
  the same value as atomic_write_hw_max.
- atomic_write_unit_{min,max} are derived from
  atomic_write_hw_unit_{min,max}, max_hw_sectors, and block core limits.
  Both min and max values must be a power-of-2.
- atomic_write_hw_boundary is set by the block driver. If non-zero, it
  indicates an LBA space boundary at which an atomic write straddles no
  longer is atomically executed by the disk. The value must be a
  power-of-2. Note that it would be acceptable to enforce a rule that
  atomic_write_hw_boundary_sectors is a multiple of
  atomic_write_hw_unit_max, but the resultant code would be more
  complicated.

All atomic writes limits are by default set 0 to indicate no atomic write
support. Even though it is assumed by Linux that a logical block can always
be atomically written, we ignore this as it is not of particular interest.
Stacked devices are just not supported either for now.

An atomic write must always be submitted to the block driver as part of a
single request. As such, only a single BIO must be submitted to the block
layer for an atomic write. When a single atomic write BIO is submitted, it
cannot be split. As such, atomic_write_unit_{max, min}_bytes are limited
by the maximum guaranteed BIO size which will not be required to be split.
This max size is calculated by request_queue max segments and the number
of bvecs a BIO can fit, BIO_MAX_VECS. Currently we rely on userspace
issuing a write with iovcnt=1 for pwritev2() - as such, we can rely on each
segment containing PAGE_SIZE of data, apart from the first+last, which each
can fit logical block size of data. The first+last will be LBS
length/aligned as we rely on direct IO alignment rules also.

New sysfs files are added to report the following atomic write limits:
- atomic_write_unit_max_bytes - same as atomic_write_unit_max_sectors in
				bytes
- atomic_write_unit_min_bytes - same as atomic_write_unit_min_sectors in
				bytes
- atomic_write_boundary_bytes - same as atomic_write_hw_boundary_sectors in
				bytes
- atomic_write_max_bytes      - same as atomic_write_max_sectors in bytes

Atomic writes may only be merged with other atomic writes and only under
the following conditions:
- total resultant request length <= atomic_write_max_bytes
- the merged write does not straddle a boundary

Helper function bdev_can_atomic_write() is added to indicate whether
atomic writes may be issued to a bdev. If a bdev is a partition, the
partition start must be aligned with both atomic_write_unit_min_sectors
and atomic_write_hw_boundary_sectors.

FSes will rely on the block layer to validate that an atomic write BIO
submitted will be of valid size, so add blk_validate_atomic_write_op_size()
for this purpose. Userspace expects an atomic write which is of invalid
size to be rejected with -EINVAL, so add BLK_STS_INVAL for this. Also use
BLK_STS_INVAL for when a BIO needs to be split, as this should mean an
invalid size BIO.

Flag REQ_ATOMIC is used for indicating an atomic write.

Co-developed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20240620125359.2684798-6-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h |  8 ++++++-
 include/linux/blkdev.h    | 55 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 781c4500491b..632edd71f8c6 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -162,6 +162,11 @@ typedef u16 blk_short_t;
  */
 #define BLK_STS_DURATION_LIMIT	((__force blk_status_t)17)
 
+/*
+ * Invalid size or alignment.
+ */
+#define BLK_STS_INVAL	((__force blk_status_t)19)
+
 /**
  * blk_path_error - returns true if error may be path related
  * @error: status the request was completed with
@@ -370,7 +375,7 @@ enum req_flag_bits {
 	__REQ_SWAP,		/* swap I/O */
 	__REQ_DRV,		/* for driver use */
 	__REQ_FS_PRIVATE,	/* for file system (submitter) use */
-
+	__REQ_ATOMIC,		/* for atomic write operations */
 	/*
 	 * Command specific flags, keep last:
 	 */
@@ -402,6 +407,7 @@ enum req_flag_bits {
 #define REQ_SWAP	(__force blk_opf_t)(1ULL << __REQ_SWAP)
 #define REQ_DRV		(__force blk_opf_t)(1ULL << __REQ_DRV)
 #define REQ_FS_PRIVATE	(__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE)
+#define REQ_ATOMIC	(__force blk_opf_t)(1ULL << __REQ_ATOMIC)
 
 #define REQ_NOUNMAP	(__force blk_opf_t)(1ULL << __REQ_NOUNMAP)
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fb7d4c21bba8..4816f3b1d528 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -377,6 +377,16 @@ struct queue_limits {
 	unsigned int		discard_alignment;
 	unsigned int		zone_write_granularity;
 
+	/* atomic write limits */
+	unsigned int		atomic_write_hw_max;
+	unsigned int		atomic_write_max_sectors;
+	unsigned int		atomic_write_hw_boundary;
+	unsigned int		atomic_write_boundary_sectors;
+	unsigned int		atomic_write_hw_unit_min;
+	unsigned int		atomic_write_unit_min;
+	unsigned int		atomic_write_hw_unit_max;
+	unsigned int		atomic_write_unit_max;
+
 	unsigned short		max_segments;
 	unsigned short		max_integrity_segments;
 	unsigned short		max_discard_segments;
@@ -1403,6 +1413,30 @@ static inline int queue_dma_alignment(const struct request_queue *q)
 	return q ? q->limits.dma_alignment : 511;
 }
 
+static inline unsigned int
+queue_atomic_write_unit_max_bytes(const struct request_queue *q)
+{
+	return q->limits.atomic_write_unit_max;
+}
+
+static inline unsigned int
+queue_atomic_write_unit_min_bytes(const struct request_queue *q)
+{
+	return q->limits.atomic_write_unit_min;
+}
+
+static inline unsigned int
+queue_atomic_write_boundary_bytes(const struct request_queue *q)
+{
+	return q->limits.atomic_write_boundary_sectors << SECTOR_SHIFT;
+}
+
+static inline unsigned int
+queue_atomic_write_max_bytes(const struct request_queue *q)
+{
+	return q->limits.atomic_write_max_sectors << SECTOR_SHIFT;
+}
+
 static inline unsigned int bdev_dma_alignment(struct block_device *bdev)
 {
 	return queue_dma_alignment(bdev_get_queue(bdev));
@@ -1644,6 +1678,27 @@ struct io_comp_batch {
 	void (*complete)(struct io_comp_batch *);
 };
 
+static inline bool bdev_can_atomic_write(struct block_device *bdev)
+{
+	struct request_queue *bd_queue = bdev->bd_queue;
+	struct queue_limits *limits = &bd_queue->limits;
+
+	if (!limits->atomic_write_unit_min)
+		return false;
+
+	if (bdev_is_partition(bdev)) {
+		sector_t bd_start_sect = bdev->bd_start_sect;
+		unsigned int alignment =
+			max(limits->atomic_write_unit_min,
+			    limits->atomic_write_hw_boundary);
+
+		if (!IS_ALIGNED(bd_start_sect, alignment >> SECTOR_SHIFT))
+			return false;
+	}
+
+	return true;
+}
+
 #define DEFINE_IO_COMP_BATCH(name)	struct io_comp_batch name = { }
 
 #endif /* _LINUX_BLKDEV_H */
-- 
cgit v1.2.3


From 9abcfbd235f59fb5b6379e5bc0231dad831ebace Mon Sep 17 00:00:00 2001
From: Prasad Singamsetty <prasad.singamsetty@oracle.com>
Date: Thu, 20 Jun 2024 12:53:55 +0000
Subject: block: Add atomic write support for statx

Extend statx system call to return additional info for atomic write support
support if the specified file is a block device.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Prasad Singamsetty <prasad.singamsetty@oracle.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Link: https://lore.kernel.org/r/20240620125359.2684798-7-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4816f3b1d528..2800231046a9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1638,7 +1638,8 @@ int sync_blockdev(struct block_device *bdev);
 int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend);
 int sync_blockdev_nowait(struct block_device *bdev);
 void sync_bdevs(bool wait);
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
+void bdev_statx(struct inode *backing_inode, struct kstat *stat,
+		u32 request_mask);
 void printk_all_partitions(void);
 int __init early_lookup_bdev(const char *pathname, dev_t *dev);
 #else
@@ -1656,7 +1657,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
 static inline void sync_bdevs(bool wait)
 {
 }
-static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+static inline void bdev_statx(struct inode *backing_inode, struct kstat *stat,
+				u32 request_mask)
 {
 }
 static inline void printk_all_partitions(void)
-- 
cgit v1.2.3


From bf4ae8f2e6407a779c0368eb0f3e047a8333be17 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Thu, 20 Jun 2024 12:53:57 +0000
Subject: scsi: sd: Atomic write support

Support is divided into two main areas:
- reading VPD pages and setting sdev request_queue limits
- support WRITE ATOMIC (16) command and tracing

The relevant block limits VPD page need to be read to allow the block layer
request_queue atomic write limits to be set. These VPD page limits are
described in sbc4r22 section 6.6.4 - Block limits VPD page.

There are five limits of interest:
- MAXIMUM ATOMIC TRANSFER LENGTH
- ATOMIC ALIGNMENT
- ATOMIC TRANSFER LENGTH GRANULARITY
- MAXIMUM ATOMIC TRANSFER LENGTH WITH BOUNDARY
- MAXIMUM ATOMIC BOUNDARY SIZE

MAXIMUM ATOMIC TRANSFER LENGTH is the maximum length for a WRITE ATOMIC
(16) command. It will not be greater than the device MAXIMUM TRANSFER
LENGTH.

ATOMIC ALIGNMENT and ATOMIC TRANSFER LENGTH GRANULARITY are the minimum
alignment and length values for an atomic write in terms of logical blocks.

Unlike NVMe, SCSI does not specify an LBA space boundary, but does specify
a per-IO boundary granularity. The maximum boundary size is specified in
MAXIMUM ATOMIC BOUNDARY SIZE. When used, this boundary value is set in the
WRITE ATOMIC (16) ATOMIC BOUNDARY field - layout for the WRITE_ATOMIC_16
command can be found in sbc4r22 section 5.48. This boundary value is the
granularity size at which the device may atomically write the data. A value
of zero in WRITE ATOMIC (16) ATOMIC BOUNDARY field means that all data must
be atomically written together.

MAXIMUM ATOMIC TRANSFER LENGTH WITH BOUNDARY is the maximum atomic write
length if a non-zero boundary value is set.

For atomic write support, the WRITE ATOMIC (16) boundary is not of much
interest, as the block layer expects each request submitted to be executed
atomically. However, the SCSI spec does leave itself open to a quirky
scenario where MAXIMUM ATOMIC TRANSFER LENGTH is zero, yet MAXIMUM ATOMIC
TRANSFER LENGTH WITH BOUNDARY and MAXIMUM ATOMIC BOUNDARY SIZE are both
non-zero. This case will be supported.

To set the block layer request_queue atomic write capabilities, sanitize
the VPD page limits and set limits as follows:
- atomic_write_unit_min is derived from granularity and alignment values.
  If no granularity value is not set, use physical block size
- atomic_write_unit_max is derived from MAXIMUM ATOMIC TRANSFER LENGTH. In
  the scenario where MAXIMUM ATOMIC TRANSFER LENGTH is zero and boundary
  limits are non-zero, use MAXIMUM ATOMIC BOUNDARY SIZE for
  atomic_write_unit_max. New flag scsi_disk.use_atomic_write_boundary is
  set for this scenario.
- atomic_write_boundary_bytes is set to zero always

SCSI also supports a WRITE ATOMIC (32) command, which is for type 2
protection enabled. This is not going to be supported now, so check for
T10_PI_TYPE2_PROTECTION when setting any request_queue limits.

To handle an atomic write request, add support for WRITE ATOMIC (16)
command in handler sd_setup_atomic_cmnd(). Flag use_atomic_write_boundary
is checked here for encoding ATOMIC BOUNDARY field.

Trace info is also added for WRITE_ATOMIC_16 command.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20240620125359.2684798-9-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/scsi/scsi_proto.h   | 1 +
 include/trace/events/scsi.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index 843106e1109f..70e1262b2e20 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -120,6 +120,7 @@
 #define WRITE_SAME_16	      0x93
 #define ZBC_OUT		      0x94
 #define ZBC_IN		      0x95
+#define WRITE_ATOMIC_16	0x9c
 #define SERVICE_ACTION_BIDIRECTIONAL 0x9d
 #define SERVICE_ACTION_IN_16  0x9e
 #define SERVICE_ACTION_OUT_16 0x9f
diff --git a/include/trace/events/scsi.h b/include/trace/events/scsi.h
index 8e2d9b1b0e77..05f1945ed204 100644
--- a/include/trace/events/scsi.h
+++ b/include/trace/events/scsi.h
@@ -102,6 +102,7 @@
 		scsi_opcode_name(WRITE_32),			\
 		scsi_opcode_name(WRITE_SAME_32),		\
 		scsi_opcode_name(ATA_16),			\
+		scsi_opcode_name(WRITE_ATOMIC_16),		\
 		scsi_opcode_name(ATA_12))
 
 #define scsi_hostbyte_name(result)	{ result, #result }
-- 
cgit v1.2.3


From 9919c5c98cb25dbf7e76aadb9beab55a2a25f830 Mon Sep 17 00:00:00 2001
From: Rafael Passos <rafael@rcpassos.me>
Date: Fri, 14 Jun 2024 23:24:08 -0300
Subject: bpf: remove unused parameter in bpf_jit_binary_pack_finalize

Fixes a compiler warning. the bpf_jit_binary_pack_finalize function
was taking an extra bpf_prog parameter that went unused.
This removves it and updates the callers accordingly.

Signed-off-by: Rafael Passos <rafael@rcpassos.me>
Link: https://lore.kernel.org/r/20240615022641.210320-2-rafael@rcpassos.me
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index b02aea291b7e..dd41a93f06b2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1129,8 +1129,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image,
 			  struct bpf_binary_header **rw_hdr,
 			  u8 **rw_image,
 			  bpf_jit_fill_hole_t bpf_fill_ill_insns);
-int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
-				 struct bpf_binary_header *ro_header,
+int bpf_jit_binary_pack_finalize(struct bpf_binary_header *ro_header,
 				 struct bpf_binary_header *rw_header);
 void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
 			      struct bpf_binary_header *rw_header);
-- 
cgit v1.2.3


From ab224b9ef7c4eaa752752455ea79bd7022209d5d Mon Sep 17 00:00:00 2001
From: Rafael Passos <rafael@rcpassos.me>
Date: Fri, 14 Jun 2024 23:24:09 -0300
Subject: bpf: remove unused parameter in __bpf_free_used_btfs

Fixes a compiler warning. The __bpf_free_used_btfs function
was taking an extra unused struct bpf_prog_aux *aux param

Signed-off-by: Rafael Passos <rafael@rcpassos.me>
Link: https://lore.kernel.org/r/20240615022641.210320-3-rafael@rcpassos.me
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f636b4998bf7..960780ef04e1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2933,8 +2933,7 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
 	return ret;
 }
 
-void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
-			  struct btf_mod_pair *used_btfs, u32 len);
+void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len);
 
 static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
 						 enum bpf_prog_type type)
-- 
cgit v1.2.3


From 158ed777e330e9bf6bd592daaf1e860d965ec8b5 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Tue, 30 Apr 2024 11:43:16 +0100
Subject: firmware: qcom: scm: Add gpu_init_regs call

This will used by drm/msm to initialize GPU registers that Qualcomm's
firmware doesn't make writeable to the kernel.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Acked-by: Bjorn Andersson <andersson@kernel.org>
Link: https://lore.kernel.org/r/20240430-a750-raytracing-v3-2-7f57c5ac082d@gmail.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/firmware/qcom/qcom_scm.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h
index aaa19f93ac43..a221a643dc12 100644
--- a/include/linux/firmware/qcom/qcom_scm.h
+++ b/include/linux/firmware/qcom/qcom_scm.h
@@ -115,6 +115,29 @@ int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
 int qcom_scm_lmh_profile_change(u32 profile_id);
 bool qcom_scm_lmh_dcvsh_available(void);
 
+/*
+ * Request TZ to program set of access controlled registers necessary
+ * irrespective of any features
+ */
+#define QCOM_SCM_GPU_ALWAYS_EN_REQ BIT(0)
+/*
+ * Request TZ to program BCL id to access controlled register when BCL is
+ * enabled
+ */
+#define QCOM_SCM_GPU_BCL_EN_REQ BIT(1)
+/*
+ * Request TZ to program set of access controlled register for CLX feature
+ * when enabled
+ */
+#define QCOM_SCM_GPU_CLX_EN_REQ BIT(2)
+/*
+ * Request TZ to program tsense ids to access controlled registers for reading
+ * gpu temperature sensors
+ */
+#define QCOM_SCM_GPU_TSENSE_EN_REQ BIT(3)
+
+int qcom_scm_gpu_init_regs(u32 gpu_req);
+
 #ifdef CONFIG_QCOM_QSEECOM
 
 int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id);
-- 
cgit v1.2.3


From 9267997fa7aa0b597e8b32cb3fdfe91be1d35a83 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Wed, 5 Jun 2024 22:10:14 +0200
Subject: soc: qcom: Move some socinfo defines to the header

In preparation for parsing the chip "feature code" (FC) and "product
code" (PC) (essentially the parameters that let us conclusively
characterize the sillicon we're running on, including various speed
bins), move the socinfo version defines to the public header.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Link: https://lore.kernel.org/r/20240605-topic-smem_speedbin-v2-1-8989d7e3d176@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/socinfo.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/qcom/socinfo.h b/include/linux/soc/qcom/socinfo.h
index e78777bb0f4a..10e0a4c287f4 100644
--- a/include/linux/soc/qcom/socinfo.h
+++ b/include/linux/soc/qcom/socinfo.h
@@ -12,6 +12,14 @@
 #define SMEM_SOCINFO_BUILD_ID_LENGTH	32
 #define SMEM_SOCINFO_CHIP_ID_LENGTH	32
 
+/*
+ * SoC version type with major number in the upper 16 bits and minor
+ * number in the lower 16 bits.
+ */
+#define SOCINFO_MAJOR(ver) (((ver) >> 16) & 0xffff)
+#define SOCINFO_MINOR(ver) ((ver) & 0xffff)
+#define SOCINFO_VERSION(maj, min)  ((((maj) & 0xffff) << 16)|((min) & 0xffff))
+
 /* Socinfo SMEM item structure */
 struct socinfo {
 	__le32 fmt;
-- 
cgit v1.2.3


From 81bbb2b891174da9301fc0d4fe9622bd4cb6a995 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Wed, 5 Jun 2024 22:10:15 +0200
Subject: soc: qcom: smem: Add a feature code getter

Recent (SM8550+ ish) Qualcomm SoCs have a new mechanism for precisely
identifying the specific SKU and the precise speed bin (in the general
meaning of this word, anyway): a pair of values called Product Code
and Feature Code.

Based on this information, we can deduce the available frequencies for
things such as Adreno. In the case of Adreno specifically, Pcode is
useless for non-prototype SoCs.

Introduce a getter for the feature code and export it.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Link: https://lore.kernel.org/r/20240605-topic-smem_speedbin-v2-2-8989d7e3d176@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/soc/qcom/smem.h    |  1 +
 include/linux/soc/qcom/socinfo.h | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h
index a36a3b9d4929..0943bf419e11 100644
--- a/include/linux/soc/qcom/smem.h
+++ b/include/linux/soc/qcom/smem.h
@@ -13,5 +13,6 @@ int qcom_smem_get_free_space(unsigned host);
 phys_addr_t qcom_smem_virt_to_phys(void *p);
 
 int qcom_smem_get_soc_id(u32 *id);
+int qcom_smem_get_feature_code(u32 *code);
 
 #endif
diff --git a/include/linux/soc/qcom/socinfo.h b/include/linux/soc/qcom/socinfo.h
index 10e0a4c287f4..608950443eee 100644
--- a/include/linux/soc/qcom/socinfo.h
+++ b/include/linux/soc/qcom/socinfo.h
@@ -3,6 +3,8 @@
 #ifndef __QCOM_SOCINFO_H__
 #define __QCOM_SOCINFO_H__
 
+#include <linux/types.h>
+
 /*
  * SMEM item id, used to acquire handles to respective
  * SMEM region.
@@ -82,4 +84,28 @@ struct socinfo {
 	__le32 boot_core;
 };
 
+/* Internal feature codes */
+enum qcom_socinfo_feature_code {
+	/* External feature codes */
+	SOCINFO_FC_UNKNOWN = 0x0,
+	SOCINFO_FC_AA,
+	SOCINFO_FC_AB,
+	SOCINFO_FC_AC,
+	SOCINFO_FC_AD,
+	SOCINFO_FC_AE,
+	SOCINFO_FC_AF,
+	SOCINFO_FC_AG,
+	SOCINFO_FC_AH,
+};
+
+/* Internal feature codes */
+/* Valid values: 0 <= n <= 0xf */
+#define SOCINFO_FC_Yn(n)		(0xf1 + (n))
+#define SOCINFO_FC_INT_MAX		SOCINFO_FC_Yn(0xf)
+
+/* Product codes */
+#define SOCINFO_PC_UNKNOWN		0
+#define SOCINFO_PCn(n)			((n) + 1)
+#define SOCINFO_PC_RESERVE		(BIT(31) - 1)
+
 #endif
-- 
cgit v1.2.3


From 5878853fc9389e7d0988d4b465a415cf96fd14fa Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Thu, 20 Jun 2024 14:27:20 +0200
Subject: dmaengine: Add API function dmaengine_prep_peripheral_dma_vec()

This function can be used to initiate a scatter-gather DMA transfer,
where the address and size of each segment is located in one entry of
the dma_vec array.

The major difference with dmaengine_prep_slave_sg() is that it supports
specifying the lengths of each DMA transfer; as trying to override the
length of the transfer with dmaengine_prep_slave_sg() is a very tedious
process. The introduction of a new API function is also justified by the
fact that scatterlists are on their way out.

Note that dmaengine_prep_interleaved_dma() is not helpful either in that
case, as it assumes that the address of each segment will be higher than
the one of the previous segment, which we just cannot guarantee in case
of a scatter-gather transfer.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Co-developed-by: Nuno Sa <nuno.sa@analog.com>
Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20240620122726.41232-2-paul@crapouillou.net
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dmaengine.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 752dbde4cec1..9fc03068cabc 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -160,6 +160,16 @@ struct dma_interleaved_template {
 	struct data_chunk sgl[];
 };
 
+/**
+ * struct dma_vec - DMA vector
+ * @addr: Bus address of the start of the vector
+ * @len: Length in bytes of the DMA vector
+ */
+struct dma_vec {
+	dma_addr_t addr;
+	size_t len;
+};
+
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
  *  control completion, and communicate status.
@@ -910,6 +920,10 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
 		struct dma_chan *chan, unsigned long flags);
 
+	struct dma_async_tx_descriptor *(*device_prep_peripheral_dma_vec)(
+		struct dma_chan *chan, const struct dma_vec *vecs,
+		size_t nents, enum dma_transfer_direction direction,
+		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
@@ -973,6 +987,25 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
 						  dir, flags, NULL);
 }
 
+/**
+ * dmaengine_prep_peripheral_dma_vec() - Prepare a DMA scatter-gather descriptor
+ * @chan: The channel to be used for this descriptor
+ * @vecs: The array of DMA vectors that should be transferred
+ * @nents: The number of DMA vectors in the array
+ * @dir: Specifies the direction of the data transfer
+ * @flags: DMA engine flags
+ */
+static inline struct dma_async_tx_descriptor *dmaengine_prep_peripheral_dma_vec(
+	struct dma_chan *chan, const struct dma_vec *vecs, size_t nents,
+	enum dma_transfer_direction dir, unsigned long flags)
+{
+	if (!chan || !chan->device || !chan->device->device_prep_peripheral_dma_vec)
+		return NULL;
+
+	return chan->device->device_prep_peripheral_dma_vec(chan, vecs, nents,
+							    dir, flags);
+}
+
 static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg(
 	struct dma_chan *chan, struct scatterlist *sgl,	unsigned int sg_len,
 	enum dma_transfer_direction dir, unsigned long flags)
-- 
cgit v1.2.3


From 5f2e950755c76c6fc20fc4ebd8355671fbbd7ac0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 13 Jun 2024 16:48:36 +0200
Subject: leds: core: Introduce led_get_color_name() function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is similar to the existing led_colors[] array but is safer to use and
usable by everyone.

Getting string representations of color ids is useful for drivers
which are handling color IDs anyways, for example for the multicolor API.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20240613-cros_ec-led-v3-1-500b50f41e0f@weissschuh.net
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/leds.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 6300313c46b7..dedea965afbf 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -427,6 +427,16 @@ void led_sysfs_enable(struct led_classdev *led_cdev);
 int led_compose_name(struct device *dev, struct led_init_data *init_data,
 		     char *led_classdev_name);
 
+/**
+ * led_get_color_name - get string representation of color ID
+ * @color_id: The LED_COLOR_ID_* constant
+ *
+ * Get the string name of a LED_COLOR_ID_* constant.
+ *
+ * Returns: A string constant or NULL on an invalid ID.
+ */
+const char *led_get_color_name(u8 color_id);
+
 /**
  * led_sysfs_is_disabled - check if LED sysfs interface is disabled
  * @led_cdev: the LED to query
-- 
cgit v1.2.3


From 1cbf347288702af0fe8667c0ce760afbe982a2f1 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Fri, 14 Jun 2024 11:14:18 +0300
Subject: i2c: Add nop fwnode operations

Add nop variants of i2c_find_device_by_fwnode(),
i2c_find_adapter_by_fwnode() and i2c_get_adapter_by_fwnode() for use
without CONFIG_I2C.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c.h | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 9709537370ee..424acb98c7c2 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -960,8 +960,6 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr);
 #define builtin_i2c_driver(__i2c_driver) \
 	builtin_driver(__i2c_driver, i2c_add_driver)
 
-#endif /* I2C */
-
 /* must call put_device() when done with returned i2c_client device */
 struct i2c_client *i2c_find_device_by_fwnode(struct fwnode_handle *fwnode);
 
@@ -971,6 +969,28 @@ struct i2c_adapter *i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode);
 /* must call i2c_put_adapter() when done with returned i2c_adapter device */
 struct i2c_adapter *i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode);
 
+#else /* I2C */
+
+static inline struct i2c_client *
+i2c_find_device_by_fwnode(struct fwnode_handle *fwnode)
+{
+	return NULL;
+}
+
+static inline struct i2c_adapter *
+i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode)
+{
+	return NULL;
+}
+
+static inline struct i2c_adapter *
+i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode)
+{
+	return NULL;
+}
+
+#endif /* !I2C */
+
 #if IS_ENABLED(CONFIG_OF)
 /* must call put_device() when done with returned i2c_client device */
 static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
-- 
cgit v1.2.3


From 6ae0092ca7adfce79336c6525cb0da561ecd4f04 Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Fri, 14 Jun 2024 14:16:05 -0700
Subject: thermal: intel: intel_tcc: Add model checks for temperature registers

The register MSR_TEMPERATURE_TARGET is not architectural. Its fields may be
defined differently for each processor model. TCC_OFFSET is an example of
such case.

Despite being specified as architectural, the registers IA32_[PACKAGE]_
THERM_STATUS have become model-specific: in recent processors, the
digital temperature readout uses bits [23:16] whereas the Intel Software
Developer's manual specifies bits [22:16].

Create an array of processor models and their bitmasks for TCC_OFFSET and
the digital temperature readout fields. Do not include recent processors.
Instead, use the bitmasks of these recent processors as default.

Use these model-specific bitmasks when reading TCC_OFFSET or the
temperature sensors.

Initialize a model-specific data structure during subsys_initcall() to
have it ready when thermal drivers are loaded.

Expose the new interface intel_tcc_get_offset_mask(). The
intel_tcc_cooling driver will use it.

Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Link: https://patch.msgid.link/20240614211606.5896-2-ricardo.neri-calderon@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/intel_tcc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h
index 8ff8eabb4a98..fa788817acfc 100644
--- a/include/linux/intel_tcc.h
+++ b/include/linux/intel_tcc.h
@@ -14,5 +14,6 @@ int intel_tcc_get_tjmax(int cpu);
 int intel_tcc_get_offset(int cpu);
 int intel_tcc_set_offset(int cpu, int offset);
 int intel_tcc_get_temp(int cpu, int *temp, bool pkg);
+u32 intel_tcc_get_offset_mask(void);
 
 #endif /* __INTEL_TCC_H__ */
-- 
cgit v1.2.3


From b6cfe2287df6e26d685af8a8a96ed1bf87bdde28 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Fri, 21 Jun 2024 12:15:05 +0900
Subject: block: Define bdev_nr_zones() as an inline function

There is no need for bdev_nr_zones() to be an exported function
calculating the number of zones of a block device. Instead, given that
all callers use this helper with a fully initialized block device that
has a gendisk, we can redefine this function as an inline helper in
blkdev.h.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20240621031506.759397-3-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2800231046a9..1078a7d51295 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -673,7 +673,6 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
-unsigned int bdev_nr_zones(struct block_device *bdev);
 
 static inline unsigned int disk_nr_zones(struct gendisk *disk)
 {
@@ -687,6 +686,11 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
 	return sector >> ilog2(disk->queue->limits.chunk_sectors);
 }
 
+static inline unsigned int bdev_nr_zones(struct block_device *bdev)
+{
+	return disk_nr_zones(bdev->bd_disk);
+}
+
 static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
 {
 	return bdev->bd_disk->queue->limits.max_open_zones;
-- 
cgit v1.2.3


From caaf7101c01a91a882d3da2f566579dda692367d Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Fri, 21 Jun 2024 12:15:06 +0900
Subject: block: Cleanup block device zone helpers

There is no need to conditionally define on CONFIG_BLK_DEV_ZONED the
inline helper functions bdev_nr_zones(), bdev_max_open_zones(),
bdev_max_active_zones() and disk_zone_no() as these function will return
the correct valu in all cases (zoned device or not, including when
CONFIG_BLK_DEV_ZONED is not set). Furthermore, disk_nr_zones()
definition can be simplified as disk->nr_zones is always 0 for regular
block devices.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20240621031506.759397-4-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 44 ++++++++++++--------------------------------
 1 file changed, 12 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1078a7d51295..e89003360c17 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -673,11 +673,21 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
-
 static inline unsigned int disk_nr_zones(struct gendisk *disk)
 {
-	return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0;
+	return disk->nr_zones;
+}
+bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs);
+#else /* CONFIG_BLK_DEV_ZONED */
+static inline unsigned int disk_nr_zones(struct gendisk *disk)
+{
+	return 0;
+}
+static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
+{
+	return false;
 }
+#endif /* CONFIG_BLK_DEV_ZONED */
 
 static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
 {
@@ -701,36 +711,6 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
 	return bdev->bd_disk->queue->limits.max_active_zones;
 }
 
-bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs);
-#else /* CONFIG_BLK_DEV_ZONED */
-static inline unsigned int bdev_nr_zones(struct block_device *bdev)
-{
-	return 0;
-}
-
-static inline unsigned int disk_nr_zones(struct gendisk *disk)
-{
-	return 0;
-}
-static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
-{
-	return 0;
-}
-static inline unsigned int bdev_max_open_zones(struct block_device *bdev)
-{
-	return 0;
-}
-
-static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
-{
-	return 0;
-}
-static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
-{
-	return false;
-}
-#endif /* CONFIG_BLK_DEV_ZONED */
-
 static inline unsigned int blk_queue_depth(struct request_queue *q)
 {
 	if (q->queue_depth)
-- 
cgit v1.2.3


From f80a55fa90fa76d01e3fffaa5d0413e522ab9a00 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Mon, 17 Jun 2024 09:27:27 +0200
Subject: nvme: fixup comment for nvme RDMA Provider Type

PRTYPE is the provider type, not the QP service type.

Fixes: eb793e2c9286 ("nvme.h: add NVMe over Fabrics definitions")
Signed-off-by: Hannes Reinecke <hare@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 425573202295..69ac2abf8acf 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -87,8 +87,8 @@ enum {
 	NVMF_RDMA_QPTYPE_DATAGRAM	= 2, /* Reliable Datagram */
 };
 
-/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
- * RDMA_QPTYPE field
+/* RDMA Provider Type codes for Discovery Log Page entry TSAS
+ * RDMA_PRTYPE field
  */
 enum {
 	NVMF_RDMA_PRTYPE_NOT_SPECIFIED	= 1, /* No Provider Specified */
-- 
cgit v1.2.3


From 0f1f5803920d2a6b88bee950914fd37421e17170 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@kernel.org>
Date: Mon, 17 Jun 2024 09:27:28 +0200
Subject: nvmet: make 'tsas' attribute idempotent for RDMA

The RDMA transport defines values for TSAS, but it cannot be changed as
we only support the 'connected' mode.
So to avoid errors during reconfiguration we should allow to write the
current value.

Fixes: 3f123494db72 ("nvmet: make TCP sectype settable via configfs")
Signed-off-by: Hannes Reinecke <hare@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 69ac2abf8acf..c693ac344ec0 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -85,6 +85,7 @@ enum {
 enum {
 	NVMF_RDMA_QPTYPE_CONNECTED	= 1, /* Reliable Connected */
 	NVMF_RDMA_QPTYPE_DATAGRAM	= 2, /* Reliable Datagram */
+	NVMF_RDMA_QPTYPE_INVALID	= 0xff,
 };
 
 /* RDMA Provider Type codes for Discovery Log Page entry TSAS
@@ -110,6 +111,7 @@ enum {
 	NVMF_TCP_SECTYPE_NONE = 0, /* No Security */
 	NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */
 	NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */
+	NVMF_TCP_SECTYPE_INVALID = 0xff,
 };
 
 #define NVME_AQ_DEPTH		32
-- 
cgit v1.2.3


From 9403408e122628a6dc7154a95716f00dae3747c7 Mon Sep 17 00:00:00 2001
From: Christian Loehle <christian.loehle@arm.com>
Date: Mon, 17 Jun 2024 17:16:15 +0100
Subject: tick: Remove unnused tick_nohz_get_idle_calls()

The function returns the idle calls counter for the current cpu and
therefore usually isn't what the caller wants. It is unnused since
commit 466a2b42d676 ("cpufreq: schedutil: Use idle_calls counter of the
remote CPU")

Signed-off-by: Christian Loehle <christian.loehle@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240617161615.49309-1-christian.loehle@arm.com
---
 include/linux/tick.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 4924a33700b7..72744638c5b0 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -139,7 +139,6 @@ extern void tick_nohz_irq_exit(void);
 extern bool tick_nohz_idle_got_tick(void);
 extern ktime_t tick_nohz_get_next_hrtimer(void);
 extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next);
-extern unsigned long tick_nohz_get_idle_calls(void);
 extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
-- 
cgit v1.2.3


From 4df13a6871d9e97aeeef72244e9a954c5cf11f54 Mon Sep 17 00:00:00 2001
From: Ben Segal <bpsegal@us.ibm.com>
Date: Wed, 19 Jun 2024 13:58:46 +0200
Subject: vfio/pci: Support 8-byte PCI loads and stores

Many PCI adapters can benefit or even require full 64bit read
and write access to their registers. In order to enable work on
user-space drivers for these devices add two new variations
vfio_pci_core_io{read|write}64 of the existing access methods
when the architecture supports 64-bit ioreads and iowrites.

Signed-off-by: Ben Segal <bpsegal@us.ibm.com>
Co-developed-by: Gerd Bayer <gbayer@linux.ibm.com>
Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
Link: https://lore.kernel.org/r/20240619115847.1344875-3-gbayer@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio_pci_core.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index f87067438ed4..7b45f70f84c3 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -155,5 +155,8 @@ int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev,	\
 VFIO_IOREAD_DECLATION(8)
 VFIO_IOREAD_DECLATION(16)
 VFIO_IOREAD_DECLATION(32)
+#ifdef ioread64
+VFIO_IOREAD_DECLATION(64)
+#endif
 
 #endif /* VFIO_PCI_CORE_H */
-- 
cgit v1.2.3


From abe8103da3c59415b790fa260a6676c7bf252a65 Mon Sep 17 00:00:00 2001
From: Gerd Bayer <gbayer@linux.ibm.com>
Date: Wed, 19 Jun 2024 13:58:47 +0200
Subject: vfio/pci: Fix typo in macro to declare accessors

Correct spelling of DECLA[RA]TION

Suggested-by: Ramesh Thomas <ramesh.thomas@intel.com>
Signed-off-by: Gerd Bayer <gbayer@linux.ibm.com>
Link: https://lore.kernel.org/r/20240619115847.1344875-4-gbayer@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio_pci_core.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 7b45f70f84c3..fbb472dd99b3 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -137,26 +137,26 @@ bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt,
 					 loff_t *buf_offset,
 					 size_t *intersect_count,
 					 size_t *register_offset);
-#define VFIO_IOWRITE_DECLATION(size) \
+#define VFIO_IOWRITE_DECLARATION(size) \
 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev,	\
 			bool test_mem, u##size val, void __iomem *io);
 
-VFIO_IOWRITE_DECLATION(8)
-VFIO_IOWRITE_DECLATION(16)
-VFIO_IOWRITE_DECLATION(32)
+VFIO_IOWRITE_DECLARATION(8)
+VFIO_IOWRITE_DECLARATION(16)
+VFIO_IOWRITE_DECLARATION(32)
 #ifdef iowrite64
-VFIO_IOWRITE_DECLATION(64)
+VFIO_IOWRITE_DECLARATION(64)
 #endif
 
-#define VFIO_IOREAD_DECLATION(size) \
+#define VFIO_IOREAD_DECLARATION(size) \
 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev,	\
 			bool test_mem, u##size *val, void __iomem *io);
 
-VFIO_IOREAD_DECLATION(8)
-VFIO_IOREAD_DECLATION(16)
-VFIO_IOREAD_DECLATION(32)
+VFIO_IOREAD_DECLARATION(8)
+VFIO_IOREAD_DECLARATION(16)
+VFIO_IOREAD_DECLARATION(32)
 #ifdef ioread64
-VFIO_IOREAD_DECLATION(64)
+VFIO_IOREAD_DECLARATION(64)
 #endif
 
 #endif /* VFIO_PCI_CORE_H */
-- 
cgit v1.2.3


From 90c3e2bc9ecbf3210d661fadee4c947d34a88ceb Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Tue, 30 Apr 2024 11:43:16 +0100
Subject: firmware: qcom_scm: Add gpu_init_regs call

This will used by drm/msm to initialize GPU registers that Qualcomm's
firmware doesn't make writeable to the kernel.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Acked-by: Bjorn Andersson <andersson@kernel.org>
Patchwork: https://patchwork.freedesktop.org/patch/592039/
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 include/linux/firmware/qcom/qcom_scm.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h
index aaa19f93ac43..a221a643dc12 100644
--- a/include/linux/firmware/qcom/qcom_scm.h
+++ b/include/linux/firmware/qcom/qcom_scm.h
@@ -115,6 +115,29 @@ int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
 int qcom_scm_lmh_profile_change(u32 profile_id);
 bool qcom_scm_lmh_dcvsh_available(void);
 
+/*
+ * Request TZ to program set of access controlled registers necessary
+ * irrespective of any features
+ */
+#define QCOM_SCM_GPU_ALWAYS_EN_REQ BIT(0)
+/*
+ * Request TZ to program BCL id to access controlled register when BCL is
+ * enabled
+ */
+#define QCOM_SCM_GPU_BCL_EN_REQ BIT(1)
+/*
+ * Request TZ to program set of access controlled register for CLX feature
+ * when enabled
+ */
+#define QCOM_SCM_GPU_CLX_EN_REQ BIT(2)
+/*
+ * Request TZ to program tsense ids to access controlled registers for reading
+ * gpu temperature sensors
+ */
+#define QCOM_SCM_GPU_TSENSE_EN_REQ BIT(3)
+
+int qcom_scm_gpu_init_regs(u32 gpu_req);
+
 #ifdef CONFIG_QCOM_QSEECOM
 
 int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id);
-- 
cgit v1.2.3


From 41fd54ef74b02233a419b4929d26662e5f105f46 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Tue, 30 Apr 2024 11:43:19 +0100
Subject: drm/msm: Add MSM_PARAM_RAYTRACING uapi

Expose the value of the software fuse to userspace.

Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/592044/
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 include/uapi/drm/msm_drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index d8a6b3472760..3fca72f73861 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -87,6 +87,7 @@ struct drm_msm_timespec {
 #define MSM_PARAM_VA_START   0x0e  /* RO: start of valid GPU iova range */
 #define MSM_PARAM_VA_SIZE    0x0f  /* RO: size of valid GPU iova range (bytes) */
 #define MSM_PARAM_HIGHEST_BANK_BIT 0x10 /* RO */
+#define MSM_PARAM_RAYTRACING 0x11 /* RO */
 
 /* For backwards compat.  The original support for preemption was based on
  * a single ring per priority level so # of priority levels equals the #
-- 
cgit v1.2.3


From d4e48e3dd45017abdd69a19285d197de897ef44f Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Thu, 20 Jun 2024 10:17:29 +0100
Subject: module, bpf: Store BTF base pointer in struct module

...as this will allow split BTF modules with a base BTF
representation (rather than the full vmlinux BTF at time of
BTF encoding) to resolve their references to kernel types in a
way that is more resilient to small changes in kernel types.

This will allow modules that are not built every time the kernel
is to provide more resilient BTF, rather than have it invalidated
every time BTF ids for core kernel types change.

Fields are ordered to avoid holes in struct module.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20240620091733.1967885-3-alan.maguire@oracle.com
---
 include/linux/module.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index ffa1c603163c..b79d926cae8a 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -509,7 +509,9 @@ struct module {
 #endif
 #ifdef CONFIG_DEBUG_INFO_BTF_MODULES
 	unsigned int btf_data_size;
+	unsigned int btf_base_data_size;
 	void *btf_data;
+	void *btf_base_data;
 #endif
 #ifdef CONFIG_JUMP_LABEL
 	struct jump_entry *jump_entries;
-- 
cgit v1.2.3


From 8646db238997df36c6ad71a9d7e0b52ceee221b2 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Thu, 20 Jun 2024 10:17:31 +0100
Subject: libbpf,bpf: Share BTF relocate-related code with kernel

Share relocation implementation with the kernel.  As part of this,
we also need the type/string iteration functions so also share
btf_iter.c file. Relocation code in kernel and userspace is identical
save for the impementation of the reparenting of split BTF to the
relocated base BTF and retrieval of the BTF header from "struct btf";
these small functions need separate user-space and kernel implementations
for the separate "struct btf"s they operate upon.

One other wrinkle on the kernel side is we have to map .BTF.ids in
modules as they were generated with the type ids used at BTF encoding
time. btf_relocate() optionally returns an array mapping from old BTF
ids to relocated ids, so we use that to fix up these references where
needed for kfuncs.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/bpf/20240620091733.1967885-5-alan.maguire@oracle.com
---
 include/linux/btf.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

(limited to 'include')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 56d91daacdba..d199fa17abb4 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -140,6 +140,7 @@ extern const struct file_operations btf_fops;
 const char *btf_get_name(const struct btf *btf);
 void btf_get(struct btf *btf);
 void btf_put(struct btf *btf);
+const struct btf_header *btf_header(const struct btf *btf);
 int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz);
 struct btf *btf_get_by_fd(int fd);
 int btf_get_info_by_fd(const struct btf *btf,
@@ -212,8 +213,10 @@ int btf_get_fd_by_id(u32 id);
 u32 btf_obj_id(const struct btf *btf);
 bool btf_is_kernel(const struct btf *btf);
 bool btf_is_module(const struct btf *btf);
+bool btf_is_vmlinux(const struct btf *btf);
 struct module *btf_try_get_module(const struct btf *btf);
 u32 btf_nr_types(const struct btf *btf);
+struct btf *btf_base_btf(const struct btf *btf);
 bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
 			   const struct btf_member *m,
 			   u32 expected_offset, u32 expected_size);
@@ -339,6 +342,11 @@ static inline u8 btf_int_offset(const struct btf_type *t)
 	return BTF_INT_OFFSET(*(u32 *)(t + 1));
 }
 
+static inline __u8 btf_int_bits(const struct btf_type *t)
+{
+	return BTF_INT_BITS(*(__u32 *)(t + 1));
+}
+
 static inline bool btf_type_is_scalar(const struct btf_type *t)
 {
 	return btf_type_is_int(t) || btf_type_is_enum(t);
@@ -478,6 +486,11 @@ static inline struct btf_param *btf_params(const struct btf_type *t)
 	return (struct btf_param *)(t + 1);
 }
 
+static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t)
+{
+	return (struct btf_decl_tag *)(t + 1);
+}
+
 static inline int btf_id_cmp_func(const void *a, const void *b)
 {
 	const int *pa = a, *pb = b;
@@ -515,9 +528,38 @@ static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *
 }
 #endif
 
+enum btf_field_iter_kind {
+	BTF_FIELD_ITER_IDS,
+	BTF_FIELD_ITER_STRS,
+};
+
+struct btf_field_desc {
+	/* once-per-type offsets */
+	int t_off_cnt, t_offs[2];
+	/* member struct size, or zero, if no members */
+	int m_sz;
+	/* repeated per-member offsets */
+	int m_off_cnt, m_offs[1];
+};
+
+struct btf_field_iter {
+	struct btf_field_desc desc;
+	void *p;
+	int m_idx;
+	int off_idx;
+	int vlen;
+};
+
 #ifdef CONFIG_BPF_SYSCALL
 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
+void btf_set_base_btf(struct btf *btf, const struct btf *base_btf);
+int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **map_ids);
+int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t,
+			enum btf_field_iter_kind iter_kind);
+__u32 *btf_field_iter_next(struct btf_field_iter *it);
+
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
+const char *btf_str_by_offset(const struct btf *btf, u32 offset);
 struct btf *btf_parse_vmlinux(void);
 struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
 u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id,
@@ -544,6 +586,28 @@ static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
 {
 	return NULL;
 }
+
+static inline void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
+{
+}
+
+static inline int btf_relocate(void *log, struct btf *btf, const struct btf *base_btf,
+			       __u32 **map_ids)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t,
+				      enum btf_field_iter_kind iter_kind)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline __u32 *btf_field_iter_next(struct btf_field_iter *it)
+{
+	return NULL;
+}
+
 static inline const char *btf_name_by_offset(const struct btf *btf,
 					     u32 offset)
 {
-- 
cgit v1.2.3


From d4a0055fdc22381fa256e345095e88d134e354c5 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Fri, 21 Jun 2024 15:51:31 -0500
Subject: spi: add devm_spi_optimize_message() helper

This adds a new helper function devm_spi_optimize_message() that
automatically registers spi_unoptimize_message() to be called
when the device is removed.

Signed-off-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20240621-devm_spi_optimize_message-v1-2-3f9dcba6e95e@baylibre.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e8e1e798924f..e6e5978f7564 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1268,6 +1268,8 @@ static inline void spi_message_free(struct spi_message *m)
 
 extern int spi_optimize_message(struct spi_device *spi, struct spi_message *msg);
 extern void spi_unoptimize_message(struct spi_message *msg);
+extern int devm_spi_optimize_message(struct device *dev, struct spi_device *spi,
+				     struct spi_message *msg);
 
 extern int spi_setup(struct spi_device *spi);
 extern int spi_async(struct spi_device *spi, struct spi_message *message);
-- 
cgit v1.2.3


From 532857c2a76ba3553302cf2a2cbec7679fb5b4fe Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Wed, 5 Jun 2024 12:20:46 -0500
Subject: dt-bindings: clock: sun50i-h616-ccu: Add GPADC clocks

Add the required clock bindings for the GPADC.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Acked-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240605172049.231108-2-macroalpha82@gmail.com
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
---
 include/dt-bindings/clock/sun50i-h616-ccu.h | 1 +
 include/dt-bindings/reset/sun50i-h616-ccu.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/sun50i-h616-ccu.h b/include/dt-bindings/clock/sun50i-h616-ccu.h
index 6f8f01e67628..ebb146ab7f8c 100644
--- a/include/dt-bindings/clock/sun50i-h616-ccu.h
+++ b/include/dt-bindings/clock/sun50i-h616-ccu.h
@@ -112,5 +112,6 @@
 #define CLK_HDCP		126
 #define CLK_BUS_HDCP		127
 #define CLK_PLL_SYSTEM_32K	128
+#define CLK_BUS_GPADC		129
 
 #endif /* _DT_BINDINGS_CLK_SUN50I_H616_H_ */
diff --git a/include/dt-bindings/reset/sun50i-h616-ccu.h b/include/dt-bindings/reset/sun50i-h616-ccu.h
index 1bd8bb0a11be..ed177c04afdd 100644
--- a/include/dt-bindings/reset/sun50i-h616-ccu.h
+++ b/include/dt-bindings/reset/sun50i-h616-ccu.h
@@ -66,5 +66,6 @@
 #define RST_BUS_TVE0		57
 #define RST_BUS_HDCP		58
 #define RST_BUS_KEYADC		59
+#define RST_BUS_GPADC		60
 
 #endif /* _DT_BINDINGS_RESET_SUN50I_H616_H_ */
-- 
cgit v1.2.3


From 06efa5f30c28eaf237247ca8c4cb46eb62cb6bd9 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 22 Jun 2024 21:38:58 -0400
Subject: closures: closure_get_not_zero(), closure_return_sync()

Provide new primitives for solving a lifetime issue with bcachefs
btree_trans objects.

closure_sync_return(): like closure_sync(), wait synchronously for any
outstanding gets. like closure_return, the closure is considered
"finished" and the ref left at 0.

closure_get_not_zero(): get a ref on a closure if it's alive, i.e. the
ref is not zero.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/closure.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/closure.h b/include/linux/closure.h
index 99155df162d0..59b8c06b11ff 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -284,6 +284,21 @@ static inline void closure_get(struct closure *cl)
 #endif
 }
 
+/**
+ * closure_get_not_zero
+ */
+static inline bool closure_get_not_zero(struct closure *cl)
+{
+	unsigned old = atomic_read(&cl->remaining);
+	do {
+		if (!(old & CLOSURE_REMAINING_MASK))
+			return false;
+
+	} while (!atomic_try_cmpxchg_acquire(&cl->remaining, &old, old + 1));
+
+	return true;
+}
+
 /**
  * closure_init - Initialize a closure, setting the refcount to 1
  * @cl:		closure to initialize
@@ -310,6 +325,12 @@ static inline void closure_init_stack(struct closure *cl)
 	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
 }
 
+static inline void closure_init_stack_release(struct closure *cl)
+{
+	memset(cl, 0, sizeof(struct closure));
+	atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+}
+
 /**
  * closure_wake_up - wake up all closures on a wait list,
  *		     with memory barrier
@@ -355,6 +376,8 @@ do {									\
  */
 #define closure_return(_cl)	continue_at((_cl), NULL, NULL)
 
+void closure_return_sync(struct closure *cl);
+
 /**
  * continue_at_nobarrier - jump to another function without barrier
  *
-- 
cgit v1.2.3


From 467cfe945656df044c8cf9121e5cdbe5b977b497 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Mon, 19 Feb 2024 13:43:55 +0200
Subject: accel/habanalabs/gaudi2: align embedded specs headers

Align embedded headers to latest release.

Reviewed-by: Tomer Tayar <ttayar@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
---
 include/linux/habanalabs/cpucp_if.h   | 10 ++++++++--
 include/linux/habanalabs/hl_boot_if.h | 29 ++++++++++++++++++++++-------
 2 files changed, 30 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
index f316c8d0f3fc..1ac1d68193e3 100644
--- a/include/linux/habanalabs/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -42,6 +42,12 @@ enum eq_event_id {
 	EQ_EVENT_PWR_BRK_ENTRY,
 	EQ_EVENT_PWR_BRK_EXIT,
 	EQ_EVENT_HEARTBEAT,
+	EQ_EVENT_CPLD_RESET_REASON,
+	EQ_EVENT_CPLD_SHUTDOWN,
+	EQ_EVENT_POWER_EVT_START,
+	EQ_EVENT_POWER_EVT_END,
+	EQ_EVENT_THERMAL_EVT_START,
+	EQ_EVENT_THERMAL_EVT_END,
 };
 
 /*
@@ -1165,7 +1171,7 @@ struct cpucp_security_info {
 struct cpucp_info {
 	struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
 	__u8 kernel_version[VERSION_MAX_LEN];
-	__le32 reserved;
+	__le32 reserved1;
 	__le32 card_type;
 	__le32 card_location;
 	__le32 cpld_version;
@@ -1187,7 +1193,7 @@ struct cpucp_info {
 	__u8 substrate_version;
 	__u8 eq_health_check_supported;
 	struct cpucp_security_info sec_info;
-	__le32 fw_hbm_region_size;
+	__le32 reserved2;
 	__u8 pll_map[PLL_MAP_LEN];
 	__le64 mme_binning_mask;
 	__u8 fw_os_version[VERSION_MAX_LEN];
diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h
index 93366d5621fd..d2a9fc96424b 100644
--- a/include/linux/habanalabs/hl_boot_if.h
+++ b/include/linux/habanalabs/hl_boot_if.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0
  *
- * Copyright 2018-2020 HabanaLabs, Ltd.
+ * Copyright 2018-2023 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -49,7 +49,6 @@ enum cpu_boot_err {
 #define CPU_BOOT_ERR_FATAL_MASK					\
 		((1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) |		\
 		 (1 << CPU_BOOT_ERR_PLL_FAIL) |			\
-		 (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) |	\
 		 (1 << CPU_BOOT_ERR_BINNING_FAIL) |		\
 		 (1 << CPU_BOOT_ERR_DRAM_SKIPPED) |		\
 		 (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) |	\
@@ -194,6 +193,8 @@ enum cpu_boot_dev_sts {
 	CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24,
 	CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25,
 	CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26,
+	CPU_BOOT_DEV_STS_NIC_MEM_CLEAR_EN = 27,
+	CPU_BOOT_DEV_STS_MMU_PGTBL_DRAM_EN = 28,
 	CPU_BOOT_DEV_STS_ENABLED = 31,
 	CPU_BOOT_DEV_STS_SCND_EN = 63,
 	CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */
@@ -331,6 +332,17 @@ enum cpu_boot_dev_sts {
  *					HWMON enum mapping to cpucp enums.
  *					Initialized in: linux
  *
+ * CPU_BOOT_DEV_STS0_NIC_MEM_CLEAR_EN
+ *					If set, means f/w supports nic hbm memory clear and
+ *					tmr,txs hbm memory init.
+ *					Initialized in: zephyr-mgmt
+ *
+ * CPU_BOOT_DEV_STS_MMU_PGTBL_DRAM_EN
+ *					MMU page tables are located in DRAM.
+ *					F/W initializes security settings for MMU
+ *					page tables to reside in DRAM.
+ *					Initialized in: zephyr-mgmt
+ *
  * CPU_BOOT_DEV_STS0_ENABLED		Device status register enabled.
  *					This is a main indication that the
  *					running FW populates the device status
@@ -367,6 +379,8 @@ enum cpu_boot_dev_sts {
 #define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN	(1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN)
 #define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN	(1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN)
 #define CPU_BOOT_DEV_STS0_MAP_HWMON_EN		(1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN)
+#define CPU_BOOT_DEV_STS0_NIC_MEM_CLEAR_EN	(1 << CPU_BOOT_DEV_STS_NIC_MEM_CLEAR_EN)
+#define CPU_BOOT_DEV_STS0_MMU_PGTBL_DRAM_EN	(1 << CPU_BOOT_DEV_STS_MMU_PGTBL_DRAM_EN)
 #define CPU_BOOT_DEV_STS0_ENABLED		(1 << CPU_BOOT_DEV_STS_ENABLED)
 #define CPU_BOOT_DEV_STS1_ENABLED		(1 << CPU_BOOT_DEV_STS_ENABLED)
 
@@ -450,11 +464,11 @@ struct cpu_dyn_regs {
 	__le32 gic_dma_core_irq_ctrl;
 	__le32 gic_host_halt_irq;
 	__le32 gic_host_ints_irq;
-	__le32 gic_host_soft_rst_irq;
+	__le32 reserved0;
 	__le32 gic_rot_qm_irq_ctrl;
-	__le32 cpu_rst_status;
+	__le32 reserved1;
 	__le32 eng_arc_irq_ctrl;
-	__le32 reserved1[20];		/* reserve for future use */
+	__le32 reserved2[20];		/* reserve for future use */
 };
 
 /* TODO: remove the desc magic after the code is updated to use message */
@@ -551,8 +565,9 @@ enum lkd_fw_ascii_msg_lvls {
 	LKD_FW_ASCII_MSG_DBG = 3,
 };
 
-#define LKD_FW_ASCII_MSG_MAX_LEN	128
-#define LKD_FW_ASCII_MSG_MAX		4	/* consider ABI when changing */
+#define LKD_FW_ASCII_MSG_MAX_LEN		128
+#define LKD_FW_ASCII_MSG_MAX			4	/* consider ABI when changing */
+#define LKD_FW_ASCII_MSG_MIN_DESC_VERSION	3
 
 struct lkd_fw_ascii_msg {
 	__u8 valid;
-- 
cgit v1.2.3


From c2a27584ff3b7f17da3ed2abfa3956f9d376e3da Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Mon, 11 Mar 2024 18:31:10 +0200
Subject: accel/habanalabs: separate nonce from max_size in cpucp_packet struct

In struct cpucp_packet both nonce and data_max_size members are in an
union overlapping each other. This is a problem as they both are used
in attestation and info_signed packets.
The solution here is to move the nonce member to a different union under
the same structure.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
---
 include/linux/habanalabs/cpucp_if.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
index 1ac1d68193e3..0913415243e8 100644
--- a/include/linux/habanalabs/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -859,9 +859,6 @@ struct cpucp_packet {
 		 * result cannot be used to hold general purpose data.
 		 */
 		__le32 status_mask;
-
-		/* random, used once number, for security packets */
-		__le32 nonce;
 	};
 
 	union {
@@ -870,6 +867,9 @@ struct cpucp_packet {
 
 		/* For Generic packet sub index */
 		__le32 pkt_subidx;
+
+		/* random, used once number, for security packets */
+		__le32 nonce;
 	};
 };
 
-- 
cgit v1.2.3


From cebb64f9335b073cc2877fed16c613c56bed82bc Mon Sep 17 00:00:00 2001
From: Vitaly Margolin <vmargolin@habana.ai>
Date: Mon, 27 May 2024 17:25:37 +0300
Subject: accel/habanalabs: add cpld ts cpld_timestamp cpucp

Add cpld_timestamp field to cpucp_info structure and return cpld
timestamp as part of cpld version

Signed-off-by: Vitaly Margolin <vmargolin@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
---
 include/linux/habanalabs/cpucp_if.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
index 0913415243e8..1ed17887f1a8 100644
--- a/include/linux/habanalabs/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -1146,6 +1146,7 @@ struct cpucp_security_info {
  *                (0 = fully functional, 1 = lower-half is not functional,
  *                 2 = upper-half is not functional)
  * @sec_info: security information
+ * @cpld_timestamp: CPLD programmed F/W timestamp.
  * @pll_map: Bit map of supported PLLs for current ASIC version.
  * @mme_binning_mask: MME binning mask,
  *                    bits [0:6]   <==> dcore0 mme fma
@@ -1193,7 +1194,7 @@ struct cpucp_info {
 	__u8 substrate_version;
 	__u8 eq_health_check_supported;
 	struct cpucp_security_info sec_info;
-	__le32 reserved2;
+	__le32 cpld_timestamp;
 	__u8 pll_map[PLL_MAP_LEN];
 	__le64 mme_binning_mask;
 	__u8 fw_os_version[VERSION_MAX_LEN];
-- 
cgit v1.2.3


From c4548eee537ed7671170eb370c6c8db12dcfd3e8 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Tue, 16 Apr 2024 19:01:42 +0300
Subject: accel/habanalabs: dump the EQ entries headers on EQ heartbeat failure

Add a dump of the EQ entries headers upon a EQ heartbeat failure.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
---
 include/linux/habanalabs/cpucp_if.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h
index 1ed17887f1a8..7ed3fdd55dda 100644
--- a/include/linux/habanalabs/cpucp_if.h
+++ b/include/linux/habanalabs/cpucp_if.h
@@ -397,6 +397,9 @@ struct hl_eq_entry {
 #define EQ_CTL_READY_SHIFT		31
 #define EQ_CTL_READY_MASK		0x80000000
 
+#define EQ_CTL_EVENT_MODE_SHIFT		28
+#define EQ_CTL_EVENT_MODE_MASK		0x70000000
+
 #define EQ_CTL_EVENT_TYPE_SHIFT		16
 #define EQ_CTL_EVENT_TYPE_MASK		0x0FFF0000
 
-- 
cgit v1.2.3


From b7f5e0636f1d450c5df00fea194a8efd39f87a1b Mon Sep 17 00:00:00 2001
From: Alex Bee <knaerzche@gmail.com>
Date: Thu, 6 Jun 2024 16:33:59 +0200
Subject: dt-bindings: clock: rk3128: Drop CLK_NR_CLKS

CLK_NR_CLKS should not be part of the binding. Let's drop it, since
the kernel code no longer uses it either.

Signed-off-by: Alex Bee <knaerzche@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240606143401.32454-4-knaerzche@gmail.com
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 include/dt-bindings/clock/rk3128-cru.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/rk3128-cru.h b/include/dt-bindings/clock/rk3128-cru.h
index 1be455ba4985..2616a8791c14 100644
--- a/include/dt-bindings/clock/rk3128-cru.h
+++ b/include/dt-bindings/clock/rk3128-cru.h
@@ -145,8 +145,6 @@
 #define HCLK_CRYPTO		476
 #define HCLK_PERI		478
 
-#define CLK_NR_CLKS		(HCLK_PERI + 1)
-
 /* soft-reset indices */
 #define SRST_CORE0_PO		0
 #define SRST_CORE1_PO		1
-- 
cgit v1.2.3


From f9da49c3c4c43278d8d8fafb3df7f5514478eb28 Mon Sep 17 00:00:00 2001
From: Alex Bee <knaerzche@gmail.com>
Date: Thu, 6 Jun 2024 16:34:00 +0200
Subject: dt-bindings: clock: rk3128: Add HCLK_SFC

Add a clock id for SFC's AHB clock.

Signed-off-by: Alex Bee <knaerzche@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240606143401.32454-5-knaerzche@gmail.com
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 include/dt-bindings/clock/rk3128-cru.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/rk3128-cru.h b/include/dt-bindings/clock/rk3128-cru.h
index 2616a8791c14..b609fcf96508 100644
--- a/include/dt-bindings/clock/rk3128-cru.h
+++ b/include/dt-bindings/clock/rk3128-cru.h
@@ -144,6 +144,7 @@
 #define HCLK_TSP		475
 #define HCLK_CRYPTO		476
 #define HCLK_PERI		478
+#define HCLK_SFC		479
 
 /* soft-reset indices */
 #define SRST_CORE0_PO		0
-- 
cgit v1.2.3


From 84f5a7b67b61bfeb0a939ddc5eca8586cae101de Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 27 May 2024 14:54:52 +0200
Subject: firmware: qcom: add a dedicated TrustZone buffer allocator

We have several SCM calls that require passing buffers to the TrustZone
on top of the SMC core which allocates memory for calls that require
more than 4 arguments.

Currently every user does their own thing which leads to code
duplication. Many users call dma_alloc_coherent() for every call which
is terribly unperformant (speed- and size-wise).

Provide a set of library functions for creating and managing pools of
memory which is suitable for sharing with the TrustZone, that is:
page-aligned, contiguous and non-cachable as well as provides a way of
mapping of kernel virtual addresses to physical space.

Make the allocator ready for extending with additional modes of operation
which will allow us to support the SHM bridge safety mechanism once all
users convert.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Andrew Halaney <ahalaney@redhat.com>
Tested-by: Andrew Halaney <ahalaney@redhat.com> # sc8280xp-lenovo-thinkpad-x13s
Tested-by: Deepti Jaggi <quic_djaggi@quicinc.com> #sa8775p-ride
Reviewed-by: Elliot Berman <quic_eberman@quicinc.com>
Link: https://lore.kernel.org/r/20240527-shm-bridge-v10-2-ce7afaa58d3a@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/firmware/qcom/qcom_tzmem.h | 56 ++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 include/linux/firmware/qcom/qcom_tzmem.h

(limited to 'include')

diff --git a/include/linux/firmware/qcom/qcom_tzmem.h b/include/linux/firmware/qcom/qcom_tzmem.h
new file mode 100644
index 000000000000..b83b63a0c049
--- /dev/null
+++ b/include/linux/firmware/qcom/qcom_tzmem.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023-2024 Linaro Ltd.
+ */
+
+#ifndef __QCOM_TZMEM_H
+#define __QCOM_TZMEM_H
+
+#include <linux/cleanup.h>
+#include <linux/gfp.h>
+#include <linux/types.h>
+
+struct device;
+struct qcom_tzmem_pool;
+
+/**
+ * enum qcom_tzmem_policy - Policy for pool growth.
+ */
+enum qcom_tzmem_policy {
+	/**< Static pool, never grow above initial size. */
+	QCOM_TZMEM_POLICY_STATIC = 1,
+	/**< When out of memory, add increment * current size of memory. */
+	QCOM_TZMEM_POLICY_MULTIPLIER,
+	/**< When out of memory add as much as is needed until max_size. */
+	QCOM_TZMEM_POLICY_ON_DEMAND,
+};
+
+/**
+ * struct qcom_tzmem_pool_config - TZ memory pool configuration.
+ * @initial_size: Number of bytes to allocate for the pool during its creation.
+ * @policy: Pool size growth policy.
+ * @increment: Used with policies that allow pool growth.
+ * @max_size: Size above which the pool will never grow.
+ */
+struct qcom_tzmem_pool_config {
+	size_t initial_size;
+	enum qcom_tzmem_policy policy;
+	size_t increment;
+	size_t max_size;
+};
+
+struct qcom_tzmem_pool *
+qcom_tzmem_pool_new(const struct qcom_tzmem_pool_config *config);
+void qcom_tzmem_pool_free(struct qcom_tzmem_pool *pool);
+struct qcom_tzmem_pool *
+devm_qcom_tzmem_pool_new(struct device *dev,
+			 const struct qcom_tzmem_pool_config *config);
+
+void *qcom_tzmem_alloc(struct qcom_tzmem_pool *pool, size_t size, gfp_t gfp);
+void qcom_tzmem_free(void *ptr);
+
+DEFINE_FREE(qcom_tzmem, void *, if (_T) qcom_tzmem_free(_T))
+
+phys_addr_t qcom_tzmem_to_phys(void *ptr);
+
+#endif /* __QCOM_TZMEM */
-- 
cgit v1.2.3


From 6612103ec35af6058bb85ab24dae28e119b3c055 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 27 May 2024 14:54:59 +0200
Subject: firmware: qcom: qseecom: convert to using the TZ allocator

Drop the DMA mapping operations from qcom_scm_qseecom_app_send() and
convert all users of it in the qseecom module to using the TZ allocator
for creating SCM call buffers. As this is largely a module separate from
the SCM driver, let's use a separate memory pool. Set the initial size to
4K and - if we run out - add twice the current amount to the pool.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Elliot Berman <quic_eberman@quicinc.com>
Reviewed-by: Amirreza Zarrabi <quic_azarrabi@quicinc.com>
Link: https://lore.kernel.org/r/20240527-shm-bridge-v10-9-ce7afaa58d3a@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/firmware/qcom/qcom_qseecom.h | 8 ++++----
 include/linux/firmware/qcom/qcom_scm.h     | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h
index 366243ee9609..1dc5b3b50aa9 100644
--- a/include/linux/firmware/qcom/qcom_qseecom.h
+++ b/include/linux/firmware/qcom/qcom_qseecom.h
@@ -73,9 +73,9 @@ static inline void qseecom_dma_free(struct qseecom_client *client, size_t size,
 /**
  * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app.
  * @client:   The QSEECOM client associated with the target app.
- * @req:      DMA address of the request buffer sent to the app.
+ * @req:      Request buffer sent to the app (must be TZ memory).
  * @req_size: Size of the request buffer.
- * @rsp:      DMA address of the response buffer, written to by the app.
+ * @rsp:      Response buffer, written to by the app (must be TZ memory).
  * @rsp_size: Size of the response buffer.
  *
  * Sends a request to the QSEE app associated with the given client and read
@@ -90,8 +90,8 @@ static inline void qseecom_dma_free(struct qseecom_client *client, size_t size,
  * Return: Zero on success, nonzero on failure.
  */
 static inline int qcom_qseecom_app_send(struct qseecom_client *client,
-					dma_addr_t req, size_t req_size,
-					dma_addr_t rsp, size_t rsp_size)
+					void *req, size_t req_size,
+					void *rsp, size_t rsp_size)
 {
 	return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size);
 }
diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h
index a221a643dc12..77be72d40200 100644
--- a/include/linux/firmware/qcom/qcom_scm.h
+++ b/include/linux/firmware/qcom/qcom_scm.h
@@ -141,8 +141,8 @@ int qcom_scm_gpu_init_regs(u32 gpu_req);
 #ifdef CONFIG_QCOM_QSEECOM
 
 int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id);
-int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size,
-			      dma_addr_t rsp, size_t rsp_size);
+int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size,
+			      void *rsp, size_t rsp_size);
 
 #else /* CONFIG_QCOM_QSEECOM */
 
@@ -152,8 +152,8 @@ static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id)
 }
 
 static inline int qcom_scm_qseecom_app_send(u32 app_id,
-					    dma_addr_t req, size_t req_size,
-					    dma_addr_t rsp, size_t rsp_size)
+					    void *req, size_t req_size,
+					    void *rsp, size_t rsp_size)
 {
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From 178e19c0df1b1b27668fc6ca43b25a03eda01dad Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 27 May 2024 14:55:00 +0200
Subject: firmware: qcom: scm: add support for SHM bridge operations

SHM Bridge is a safety mechanism allowing to limit the amount of memory
shared between the kernel and the TrustZone to regions explicitly marked
as such.

Add low-level primitives for enabling SHM bridge support as well as
creating and destroying SHM bridges to qcom-scm.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Acked-by: Andrew Halaney <ahalaney@redhat.com>
Tested-by: Andrew Halaney <ahalaney@redhat.com> # sc8280xp-lenovo-thinkpad-x13s
Tested-by: Deepti Jaggi <quic_djaggi@quicinc.com> #sa8775p-ride
Reviewed-by: Elliot Berman <quic_eberman@quicinc.com>
Link: https://lore.kernel.org/r/20240527-shm-bridge-v10-10-ce7afaa58d3a@linaro.org
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/firmware/qcom/qcom_scm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h
index 77be72d40200..9f14976399ab 100644
--- a/include/linux/firmware/qcom/qcom_scm.h
+++ b/include/linux/firmware/qcom/qcom_scm.h
@@ -138,6 +138,12 @@ bool qcom_scm_lmh_dcvsh_available(void);
 
 int qcom_scm_gpu_init_regs(u32 gpu_req);
 
+int qcom_scm_shm_bridge_enable(void);
+int qcom_scm_shm_bridge_create(struct device *dev, u64 pfn_and_ns_perm_flags,
+			       u64 ipfn_and_s_perm_flags, u64 size_and_flags,
+			       u64 ns_vmids, u64 *handle);
+int qcom_scm_shm_bridge_delete(struct device *dev, u64 handle);
+
 #ifdef CONFIG_QCOM_QSEECOM
 
 int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id);
-- 
cgit v1.2.3


From ae24ba769b5c108b00cdf49ed39b189eb012fbb8 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Sun, 23 Jun 2024 17:39:00 +0900
Subject: firewire: core: undefine macros after use in tracepoints events

Some macros are defined in tracepoints events. They should be back to
undefined state after use.

Link: https://lore.kernel.org/r/20240623083900.777897-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index d237a30d197b..25289a063deb 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -428,6 +428,14 @@ TRACE_EVENT(self_id_sequence,
 	)
 );
 
+#undef PHY_PACKET_SELF_ID_GET_PHY_ID
+#undef PHY_PACKET_SELF_ID_GET_LINK_ACTIVE
+#undef PHY_PACKET_SELF_ID_GET_GAP_COUNT
+#undef PHY_PACKET_SELF_ID_GET_SCODE
+#undef PHY_PACKET_SELF_ID_GET_CONTENDER
+#undef PHY_PACKET_SELF_ID_GET_POWER_CLASS
+#undef PHY_PACKET_SELF_ID_GET_INITIATED_RESET
+
 #undef QUADLET_SIZE
 
 #endif // _FIREWIRE_TRACE_EVENT_H
-- 
cgit v1.2.3


From b5bad839c01e3a2a41056ac0bc07e7b1ea0ba412 Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht@ti.com>
Date: Wed, 19 Jun 2024 01:06:48 +0530
Subject: media: v4l2-jpeg: Export reference quantization and huffman tables

Export reference quantization and huffman tables as provided in ITU-T.81 so
that they can be re-used by other JPEG drivers.

These are example tables provided in ITU-T.81 as reference tables and the
JPEG encoders are free to use either these or their own proprietary tables.

Also add necessary prefixes to be used for huffman tables in global header
file.

Signed-off-by: Devarsh Thakkar <devarsht@ti.com>
Reviewed-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Sebastian Fricke <sebastian.fricke@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/media/v4l2-jpeg.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-jpeg.h b/include/media/v4l2-jpeg.h
index 2dba843ce3bd..b65658a02e3c 100644
--- a/include/media/v4l2-jpeg.h
+++ b/include/media/v4l2-jpeg.h
@@ -14,6 +14,30 @@
 
 #define V4L2_JPEG_MAX_COMPONENTS	4
 #define V4L2_JPEG_MAX_TABLES		4
+/*
+ * Prefixes used to generate huffman table class and destination identifiers as
+ * described below:
+ *
+ * V4L2_JPEG_LUM_HT | V4L2_JPEG_DC_HT : Prefix for Luma DC coefficients
+ *					huffman table
+ * V4L2_JPEG_LUM_HT | V4L2_JPEG_AC_HT : Prefix for Luma AC coefficients
+ *					huffman table
+ * V4L2_JPEG_CHR_HT | V4L2_JPEG_DC_HT : Prefix for Chroma DC coefficients
+ *					huffman table
+ * V4L2_JPEG_CHR_HT | V4L2_JPEG_AC_HT : Prefix for Chroma AC coefficients
+ *					huffman table
+ */
+#define V4L2_JPEG_LUM_HT		0x00
+#define V4L2_JPEG_CHR_HT		0x01
+#define V4L2_JPEG_DC_HT			0x00
+#define V4L2_JPEG_AC_HT			0x10
+
+/* Length of reference huffman tables as provided in Table K.3 of ITU-T.81 */
+#define V4L2_JPEG_REF_HT_AC_LEN		178
+#define V4L2_JPEG_REF_HT_DC_LEN		28
+
+/* Array size for 8x8 block of samples or DCT coefficient */
+#define V4L2_JPEG_PIXELS_IN_BLOCK	64
 
 /**
  * struct v4l2_jpeg_reference - reference into the JPEG buffer
@@ -154,4 +178,12 @@ int v4l2_jpeg_parse_quantization_tables(void *buf, size_t len, u8 precision,
 int v4l2_jpeg_parse_huffman_tables(void *buf, size_t len,
 				   struct v4l2_jpeg_reference *huffman_tables);
 
+extern const u8 v4l2_jpeg_zigzag_scan_index[V4L2_JPEG_PIXELS_IN_BLOCK];
+extern const u8 v4l2_jpeg_ref_table_luma_qt[V4L2_JPEG_PIXELS_IN_BLOCK];
+extern const u8 v4l2_jpeg_ref_table_chroma_qt[V4L2_JPEG_PIXELS_IN_BLOCK];
+extern const u8 v4l2_jpeg_ref_table_luma_dc_ht[V4L2_JPEG_REF_HT_DC_LEN];
+extern const u8 v4l2_jpeg_ref_table_luma_ac_ht[V4L2_JPEG_REF_HT_AC_LEN];
+extern const u8 v4l2_jpeg_ref_table_chroma_dc_ht[V4L2_JPEG_REF_HT_DC_LEN];
+extern const u8 v4l2_jpeg_ref_table_chroma_ac_ht[V4L2_JPEG_REF_HT_AC_LEN];
+
 #endif
-- 
cgit v1.2.3


From 00dd4d86ed908e70d912a96ad91d1248ff055b62 Mon Sep 17 00:00:00 2001
From: Shenghao Ding <shenghao-ding@ti.com>
Date: Fri, 21 Jun 2024 21:23:07 +0800
Subject: ASoc: tas2781: Add name_prefix as the prefix name of firmwares and
 kcontrol to support corresponding TAS2563/TAS2781s

Add name_prefix as the prefix name of firmwares and
kcontrol to support corresponding TAS2563/TAS2781s.
name_prefix is not mandatory.

Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
Link: https://patch.msgid.link/20240621132309.564-1-shenghao-ding@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/tas2781.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h
index 99ca3e401fd1..cd8ce522b78e 100644
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -108,6 +108,7 @@ struct tasdevice_priv {
 	unsigned char coef_binaryname[64];
 	unsigned char rca_binaryname[64];
 	unsigned char dev_name[32];
+	const char *name_prefix;
 	unsigned char ndev;
 	unsigned int magic_num;
 	unsigned int chip_id;
-- 
cgit v1.2.3


From c5d1e53040efd3c630df3e2dba3df76372174aa6 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut+renesas@mailbox.org>
Date: Sun, 16 Jun 2024 18:00:20 +0200
Subject: dt-bindings: clock: r8a7779: Remove duplicate newline

Drop duplicate newline. No functional change.

Signed-off-by: Marek Vasut <marek.vasut+renesas@mailbox.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20240616160038.45937-1-marek.vasut+renesas@mailbox.org
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/dt-bindings/clock/r8a7779-clock.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/r8a7779-clock.h b/include/dt-bindings/clock/r8a7779-clock.h
index 342a60b11934..e39acdc6499c 100644
--- a/include/dt-bindings/clock/r8a7779-clock.h
+++ b/include/dt-bindings/clock/r8a7779-clock.h
@@ -57,5 +57,4 @@
 #define R8A7779_CLK_MMC1	30
 #define R8A7779_CLK_MMC0	31
 
-
 #endif /* __DT_BINDINGS_CLOCK_R8A7779_H__ */
-- 
cgit v1.2.3


From d6c5fc9add9eb7a0d5bd179ab07cbf43e32b28d8 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 19 Jun 2024 13:22:46 +0200
Subject: dt-bindings: clock: rcar-gen2: Remove obsolete header files

The clock definitions in <dt-bindings/clock/r8a779?-clock.h> were
superseded by those in <dt-bindings/clock/r8a779?-cpg-mssr.h> a long
time ago.

The last DTS user of these files was removed in commit 362b334b17943d84
("ARM: dts: r8a7791: Convert to new CPG/MSSR bindings") in v4.15.
Driver support for the old bindings was removed in commit
58256143cff7c2e0 ("clk: renesas: Remove R-Car Gen2 legacy DT clock
support") in v5.5, so there is no point to keep on carrying these.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/d4abb688d666be35e99577a25b16958cbb4c3c98.1718796005.git.geert+renesas@glider.be
---
 include/dt-bindings/clock/r8a7790-clock.h | 158 -----------------------------
 include/dt-bindings/clock/r8a7791-clock.h | 161 ------------------------------
 include/dt-bindings/clock/r8a7792-clock.h |  98 ------------------
 include/dt-bindings/clock/r8a7793-clock.h | 159 -----------------------------
 include/dt-bindings/clock/r8a7794-clock.h | 137 -------------------------
 5 files changed, 713 deletions(-)
 delete mode 100644 include/dt-bindings/clock/r8a7790-clock.h
 delete mode 100644 include/dt-bindings/clock/r8a7791-clock.h
 delete mode 100644 include/dt-bindings/clock/r8a7792-clock.h
 delete mode 100644 include/dt-bindings/clock/r8a7793-clock.h
 delete mode 100644 include/dt-bindings/clock/r8a7794-clock.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/r8a7790-clock.h b/include/dt-bindings/clock/r8a7790-clock.h
deleted file mode 100644
index c92ff1e60223..000000000000
--- a/include/dt-bindings/clock/r8a7790-clock.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2013 Ideas On Board SPRL
- */
-
-#ifndef __DT_BINDINGS_CLOCK_R8A7790_H__
-#define __DT_BINDINGS_CLOCK_R8A7790_H__
-
-/* CPG */
-#define R8A7790_CLK_MAIN		0
-#define R8A7790_CLK_PLL0		1
-#define R8A7790_CLK_PLL1		2
-#define R8A7790_CLK_PLL3		3
-#define R8A7790_CLK_LB			4
-#define R8A7790_CLK_QSPI		5
-#define R8A7790_CLK_SDH			6
-#define R8A7790_CLK_SD0			7
-#define R8A7790_CLK_SD1			8
-#define R8A7790_CLK_Z			9
-#define R8A7790_CLK_RCAN		10
-#define R8A7790_CLK_ADSP		11
-
-/* MSTP0 */
-#define R8A7790_CLK_MSIOF0		0
-
-/* MSTP1 */
-#define R8A7790_CLK_VCP1		0
-#define R8A7790_CLK_VCP0		1
-#define R8A7790_CLK_VPC1		2
-#define R8A7790_CLK_VPC0		3
-#define R8A7790_CLK_JPU			6
-#define R8A7790_CLK_SSP1		9
-#define R8A7790_CLK_TMU1		11
-#define R8A7790_CLK_3DG			12
-#define R8A7790_CLK_2DDMAC		15
-#define R8A7790_CLK_FDP1_2		17
-#define R8A7790_CLK_FDP1_1		18
-#define R8A7790_CLK_FDP1_0		19
-#define R8A7790_CLK_TMU3		21
-#define R8A7790_CLK_TMU2		22
-#define R8A7790_CLK_CMT0		24
-#define R8A7790_CLK_TMU0		25
-#define R8A7790_CLK_VSP1_DU1		27
-#define R8A7790_CLK_VSP1_DU0		28
-#define R8A7790_CLK_VSP1_R		30
-#define R8A7790_CLK_VSP1_S		31
-
-/* MSTP2 */
-#define R8A7790_CLK_SCIFA2		2
-#define R8A7790_CLK_SCIFA1		3
-#define R8A7790_CLK_SCIFA0		4
-#define R8A7790_CLK_MSIOF2		5
-#define R8A7790_CLK_SCIFB0		6
-#define R8A7790_CLK_SCIFB1		7
-#define R8A7790_CLK_MSIOF1		8
-#define R8A7790_CLK_MSIOF3		15
-#define R8A7790_CLK_SCIFB2		16
-#define R8A7790_CLK_SYS_DMAC1		18
-#define R8A7790_CLK_SYS_DMAC0		19
-
-/* MSTP3 */
-#define R8A7790_CLK_IIC2		0
-#define R8A7790_CLK_TPU0		4
-#define R8A7790_CLK_MMCIF1		5
-#define R8A7790_CLK_SCIF2		10
-#define R8A7790_CLK_SDHI3		11
-#define R8A7790_CLK_SDHI2		12
-#define R8A7790_CLK_SDHI1		13
-#define R8A7790_CLK_SDHI0		14
-#define R8A7790_CLK_MMCIF0		15
-#define R8A7790_CLK_IIC0		18
-#define R8A7790_CLK_PCIEC		19
-#define R8A7790_CLK_IIC1		23
-#define R8A7790_CLK_SSUSB		28
-#define R8A7790_CLK_CMT1		29
-#define R8A7790_CLK_USBDMAC0		30
-#define R8A7790_CLK_USBDMAC1		31
-
-/* MSTP4 */
-#define R8A7790_CLK_IRQC		7
-#define R8A7790_CLK_INTC_SYS		8
-
-/* MSTP5 */
-#define R8A7790_CLK_AUDIO_DMAC1		1
-#define R8A7790_CLK_AUDIO_DMAC0		2
-#define R8A7790_CLK_ADSP_MOD		6
-#define R8A7790_CLK_THERMAL		22
-#define R8A7790_CLK_PWM			23
-
-/* MSTP7 */
-#define R8A7790_CLK_EHCI		3
-#define R8A7790_CLK_HSUSB		4
-#define R8A7790_CLK_HSCIF1		16
-#define R8A7790_CLK_HSCIF0		17
-#define R8A7790_CLK_SCIF1		20
-#define R8A7790_CLK_SCIF0		21
-#define R8A7790_CLK_DU2			22
-#define R8A7790_CLK_DU1			23
-#define R8A7790_CLK_DU0			24
-#define R8A7790_CLK_LVDS1		25
-#define R8A7790_CLK_LVDS0		26
-
-/* MSTP8 */
-#define R8A7790_CLK_MLB			2
-#define R8A7790_CLK_VIN3		8
-#define R8A7790_CLK_VIN2		9
-#define R8A7790_CLK_VIN1		10
-#define R8A7790_CLK_VIN0		11
-#define R8A7790_CLK_ETHERAVB		12
-#define R8A7790_CLK_ETHER		13
-#define R8A7790_CLK_SATA1		14
-#define R8A7790_CLK_SATA0		15
-
-/* MSTP9 */
-#define R8A7790_CLK_GPIO5		7
-#define R8A7790_CLK_GPIO4		8
-#define R8A7790_CLK_GPIO3		9
-#define R8A7790_CLK_GPIO2		10
-#define R8A7790_CLK_GPIO1		11
-#define R8A7790_CLK_GPIO0		12
-#define R8A7790_CLK_RCAN1		15
-#define R8A7790_CLK_RCAN0		16
-#define R8A7790_CLK_QSPI_MOD		17
-#define R8A7790_CLK_IICDVFS		26
-#define R8A7790_CLK_I2C3		28
-#define R8A7790_CLK_I2C2		29
-#define R8A7790_CLK_I2C1		30
-#define R8A7790_CLK_I2C0		31
-
-/* MSTP10 */
-#define R8A7790_CLK_SSI_ALL		5
-#define R8A7790_CLK_SSI9		6
-#define R8A7790_CLK_SSI8		7
-#define R8A7790_CLK_SSI7		8
-#define R8A7790_CLK_SSI6		9
-#define R8A7790_CLK_SSI5		10
-#define R8A7790_CLK_SSI4		11
-#define R8A7790_CLK_SSI3		12
-#define R8A7790_CLK_SSI2		13
-#define R8A7790_CLK_SSI1		14
-#define R8A7790_CLK_SSI0		15
-#define R8A7790_CLK_SCU_ALL		17
-#define R8A7790_CLK_SCU_DVC1		18
-#define R8A7790_CLK_SCU_DVC0		19
-#define R8A7790_CLK_SCU_CTU1_MIX1	20
-#define R8A7790_CLK_SCU_CTU0_MIX0	21
-#define R8A7790_CLK_SCU_SRC9		22
-#define R8A7790_CLK_SCU_SRC8		23
-#define R8A7790_CLK_SCU_SRC7		24
-#define R8A7790_CLK_SCU_SRC6		25
-#define R8A7790_CLK_SCU_SRC5		26
-#define R8A7790_CLK_SCU_SRC4		27
-#define R8A7790_CLK_SCU_SRC3		28
-#define R8A7790_CLK_SCU_SRC2		29
-#define R8A7790_CLK_SCU_SRC1		30
-#define R8A7790_CLK_SCU_SRC0		31
-
-#endif /* __DT_BINDINGS_CLOCK_R8A7790_H__ */
diff --git a/include/dt-bindings/clock/r8a7791-clock.h b/include/dt-bindings/clock/r8a7791-clock.h
deleted file mode 100644
index bb4f18b1b3d5..000000000000
--- a/include/dt-bindings/clock/r8a7791-clock.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2013 Ideas On Board SPRL
- */
-
-#ifndef __DT_BINDINGS_CLOCK_R8A7791_H__
-#define __DT_BINDINGS_CLOCK_R8A7791_H__
-
-/* CPG */
-#define R8A7791_CLK_MAIN		0
-#define R8A7791_CLK_PLL0		1
-#define R8A7791_CLK_PLL1		2
-#define R8A7791_CLK_PLL3		3
-#define R8A7791_CLK_LB			4
-#define R8A7791_CLK_QSPI		5
-#define R8A7791_CLK_SDH			6
-#define R8A7791_CLK_SD0			7
-#define R8A7791_CLK_Z			8
-#define R8A7791_CLK_RCAN		9
-#define R8A7791_CLK_ADSP		10
-
-/* MSTP0 */
-#define R8A7791_CLK_MSIOF0		0
-
-/* MSTP1 */
-#define R8A7791_CLK_VCP0		1
-#define R8A7791_CLK_VPC0		3
-#define R8A7791_CLK_JPU			6
-#define R8A7791_CLK_SSP1		9
-#define R8A7791_CLK_TMU1		11
-#define R8A7791_CLK_3DG			12
-#define R8A7791_CLK_2DDMAC		15
-#define R8A7791_CLK_FDP1_1		18
-#define R8A7791_CLK_FDP1_0		19
-#define R8A7791_CLK_TMU3		21
-#define R8A7791_CLK_TMU2		22
-#define R8A7791_CLK_CMT0		24
-#define R8A7791_CLK_TMU0		25
-#define R8A7791_CLK_VSP1_DU1		27
-#define R8A7791_CLK_VSP1_DU0		28
-#define R8A7791_CLK_VSP1_S		31
-
-/* MSTP2 */
-#define R8A7791_CLK_SCIFA2		2
-#define R8A7791_CLK_SCIFA1		3
-#define R8A7791_CLK_SCIFA0		4
-#define R8A7791_CLK_MSIOF2		5
-#define R8A7791_CLK_SCIFB0		6
-#define R8A7791_CLK_SCIFB1		7
-#define R8A7791_CLK_MSIOF1		8
-#define R8A7791_CLK_SCIFB2		16
-#define R8A7791_CLK_SYS_DMAC1		18
-#define R8A7791_CLK_SYS_DMAC0		19
-
-/* MSTP3 */
-#define R8A7791_CLK_TPU0		4
-#define R8A7791_CLK_SDHI2		11
-#define R8A7791_CLK_SDHI1		12
-#define R8A7791_CLK_SDHI0		14
-#define R8A7791_CLK_MMCIF0		15
-#define R8A7791_CLK_IIC0		18
-#define R8A7791_CLK_PCIEC		19
-#define R8A7791_CLK_IIC1		23
-#define R8A7791_CLK_SSUSB		28
-#define R8A7791_CLK_CMT1		29
-#define R8A7791_CLK_USBDMAC0		30
-#define R8A7791_CLK_USBDMAC1		31
-
-/* MSTP4 */
-#define R8A7791_CLK_IRQC		7
-#define R8A7791_CLK_INTC_SYS		8
-
-/* MSTP5 */
-#define R8A7791_CLK_AUDIO_DMAC1		1
-#define R8A7791_CLK_AUDIO_DMAC0		2
-#define R8A7791_CLK_ADSP_MOD		6
-#define R8A7791_CLK_THERMAL		22
-#define R8A7791_CLK_PWM			23
-
-/* MSTP7 */
-#define R8A7791_CLK_EHCI		3
-#define R8A7791_CLK_HSUSB		4
-#define R8A7791_CLK_HSCIF2		13
-#define R8A7791_CLK_SCIF5		14
-#define R8A7791_CLK_SCIF4		15
-#define R8A7791_CLK_HSCIF1		16
-#define R8A7791_CLK_HSCIF0		17
-#define R8A7791_CLK_SCIF3		18
-#define R8A7791_CLK_SCIF2		19
-#define R8A7791_CLK_SCIF1		20
-#define R8A7791_CLK_SCIF0		21
-#define R8A7791_CLK_DU1			23
-#define R8A7791_CLK_DU0			24
-#define R8A7791_CLK_LVDS0		26
-
-/* MSTP8 */
-#define R8A7791_CLK_IPMMU_SGX		0
-#define R8A7791_CLK_MLB			2
-#define R8A7791_CLK_VIN2		9
-#define R8A7791_CLK_VIN1		10
-#define R8A7791_CLK_VIN0		11
-#define R8A7791_CLK_ETHERAVB		12
-#define R8A7791_CLK_ETHER		13
-#define R8A7791_CLK_SATA1		14
-#define R8A7791_CLK_SATA0		15
-
-/* MSTP9 */
-#define R8A7791_CLK_GYROADC		1
-#define R8A7791_CLK_GPIO7		4
-#define R8A7791_CLK_GPIO6		5
-#define R8A7791_CLK_GPIO5		7
-#define R8A7791_CLK_GPIO4		8
-#define R8A7791_CLK_GPIO3		9
-#define R8A7791_CLK_GPIO2		10
-#define R8A7791_CLK_GPIO1		11
-#define R8A7791_CLK_GPIO0		12
-#define R8A7791_CLK_RCAN1		15
-#define R8A7791_CLK_RCAN0		16
-#define R8A7791_CLK_QSPI_MOD		17
-#define R8A7791_CLK_I2C5		25
-#define R8A7791_CLK_IICDVFS		26
-#define R8A7791_CLK_I2C4		27
-#define R8A7791_CLK_I2C3		28
-#define R8A7791_CLK_I2C2		29
-#define R8A7791_CLK_I2C1		30
-#define R8A7791_CLK_I2C0		31
-
-/* MSTP10 */
-#define R8A7791_CLK_SSI_ALL		5
-#define R8A7791_CLK_SSI9		6
-#define R8A7791_CLK_SSI8		7
-#define R8A7791_CLK_SSI7		8
-#define R8A7791_CLK_SSI6		9
-#define R8A7791_CLK_SSI5		10
-#define R8A7791_CLK_SSI4		11
-#define R8A7791_CLK_SSI3		12
-#define R8A7791_CLK_SSI2		13
-#define R8A7791_CLK_SSI1		14
-#define R8A7791_CLK_SSI0		15
-#define R8A7791_CLK_SCU_ALL		17
-#define R8A7791_CLK_SCU_DVC1		18
-#define R8A7791_CLK_SCU_DVC0		19
-#define R8A7791_CLK_SCU_CTU1_MIX1	20
-#define R8A7791_CLK_SCU_CTU0_MIX0	21
-#define R8A7791_CLK_SCU_SRC9		22
-#define R8A7791_CLK_SCU_SRC8		23
-#define R8A7791_CLK_SCU_SRC7		24
-#define R8A7791_CLK_SCU_SRC6		25
-#define R8A7791_CLK_SCU_SRC5		26
-#define R8A7791_CLK_SCU_SRC4		27
-#define R8A7791_CLK_SCU_SRC3		28
-#define R8A7791_CLK_SCU_SRC2		29
-#define R8A7791_CLK_SCU_SRC1		30
-#define R8A7791_CLK_SCU_SRC0		31
-
-/* MSTP11 */
-#define R8A7791_CLK_SCIFA3		6
-#define R8A7791_CLK_SCIFA4		7
-#define R8A7791_CLK_SCIFA5		8
-
-#endif /* __DT_BINDINGS_CLOCK_R8A7791_H__ */
diff --git a/include/dt-bindings/clock/r8a7792-clock.h b/include/dt-bindings/clock/r8a7792-clock.h
deleted file mode 100644
index 2948d9ce3a14..000000000000
--- a/include/dt-bindings/clock/r8a7792-clock.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2016 Cogent Embedded, Inc.
- */
-
-#ifndef __DT_BINDINGS_CLOCK_R8A7792_H__
-#define __DT_BINDINGS_CLOCK_R8A7792_H__
-
-/* CPG */
-#define R8A7792_CLK_MAIN		0
-#define R8A7792_CLK_PLL0		1
-#define R8A7792_CLK_PLL1		2
-#define R8A7792_CLK_PLL3		3
-#define R8A7792_CLK_LB			4
-#define R8A7792_CLK_QSPI		5
-
-/* MSTP0 */
-#define R8A7792_CLK_MSIOF0		0
-
-/* MSTP1 */
-#define R8A7792_CLK_JPU			6
-#define R8A7792_CLK_TMU1		11
-#define R8A7792_CLK_TMU3		21
-#define R8A7792_CLK_TMU2		22
-#define R8A7792_CLK_CMT0		24
-#define R8A7792_CLK_TMU0		25
-#define R8A7792_CLK_VSP1DU1		27
-#define R8A7792_CLK_VSP1DU0		28
-#define R8A7792_CLK_VSP1_SY		31
-
-/* MSTP2 */
-#define R8A7792_CLK_MSIOF1		8
-#define R8A7792_CLK_SYS_DMAC1		18
-#define R8A7792_CLK_SYS_DMAC0		19
-
-/* MSTP3 */
-#define R8A7792_CLK_TPU0		4
-#define R8A7792_CLK_SDHI0		14
-#define R8A7792_CLK_CMT1		29
-
-/* MSTP4 */
-#define R8A7792_CLK_IRQC		7
-#define R8A7792_CLK_INTC_SYS		8
-
-/* MSTP5 */
-#define R8A7792_CLK_AUDIO_DMAC0		2
-#define R8A7792_CLK_THERMAL		22
-#define R8A7792_CLK_PWM			23
-
-/* MSTP7 */
-#define R8A7792_CLK_HSCIF1		16
-#define R8A7792_CLK_HSCIF0		17
-#define R8A7792_CLK_SCIF3		18
-#define R8A7792_CLK_SCIF2		19
-#define R8A7792_CLK_SCIF1		20
-#define R8A7792_CLK_SCIF0		21
-#define R8A7792_CLK_DU1			23
-#define R8A7792_CLK_DU0			24
-
-/* MSTP8 */
-#define R8A7792_CLK_VIN5		4
-#define R8A7792_CLK_VIN4		5
-#define R8A7792_CLK_VIN3		8
-#define R8A7792_CLK_VIN2		9
-#define R8A7792_CLK_VIN1		10
-#define R8A7792_CLK_VIN0		11
-#define R8A7792_CLK_ETHERAVB		12
-
-/* MSTP9 */
-#define R8A7792_CLK_GPIO7		4
-#define R8A7792_CLK_GPIO6		5
-#define R8A7792_CLK_GPIO5		7
-#define R8A7792_CLK_GPIO4		8
-#define R8A7792_CLK_GPIO3		9
-#define R8A7792_CLK_GPIO2		10
-#define R8A7792_CLK_GPIO1		11
-#define R8A7792_CLK_GPIO0		12
-#define R8A7792_CLK_GPIO11		13
-#define R8A7792_CLK_GPIO10		14
-#define R8A7792_CLK_CAN1		15
-#define R8A7792_CLK_CAN0		16
-#define R8A7792_CLK_QSPI_MOD		17
-#define R8A7792_CLK_GPIO9		19
-#define R8A7792_CLK_GPIO8		21
-#define R8A7792_CLK_I2C5		25
-#define R8A7792_CLK_IICDVFS		26
-#define R8A7792_CLK_I2C4		27
-#define R8A7792_CLK_I2C3		28
-#define R8A7792_CLK_I2C2		29
-#define R8A7792_CLK_I2C1		30
-#define R8A7792_CLK_I2C0		31
-
-/* MSTP10 */
-#define R8A7792_CLK_SSI_ALL		5
-#define R8A7792_CLK_SSI4		11
-#define R8A7792_CLK_SSI3		12
-
-#endif /* __DT_BINDINGS_CLOCK_R8A7792_H__ */
diff --git a/include/dt-bindings/clock/r8a7793-clock.h b/include/dt-bindings/clock/r8a7793-clock.h
deleted file mode 100644
index 49c66d8ed178..000000000000
--- a/include/dt-bindings/clock/r8a7793-clock.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * r8a7793 clock definition
- *
- * Copyright (C) 2014  Renesas Electronics Corporation
- */
-
-#ifndef __DT_BINDINGS_CLOCK_R8A7793_H__
-#define __DT_BINDINGS_CLOCK_R8A7793_H__
-
-/* CPG */
-#define R8A7793_CLK_MAIN		0
-#define R8A7793_CLK_PLL0		1
-#define R8A7793_CLK_PLL1		2
-#define R8A7793_CLK_PLL3		3
-#define R8A7793_CLK_LB			4
-#define R8A7793_CLK_QSPI		5
-#define R8A7793_CLK_SDH			6
-#define R8A7793_CLK_SD0			7
-#define R8A7793_CLK_Z			8
-#define R8A7793_CLK_RCAN		9
-#define R8A7793_CLK_ADSP		10
-
-/* MSTP0 */
-#define R8A7793_CLK_MSIOF0		0
-
-/* MSTP1 */
-#define R8A7793_CLK_VCP0		1
-#define R8A7793_CLK_VPC0		3
-#define R8A7793_CLK_SSP1		9
-#define R8A7793_CLK_TMU1		11
-#define R8A7793_CLK_3DG			12
-#define R8A7793_CLK_2DDMAC		15
-#define R8A7793_CLK_FDP1_1		18
-#define R8A7793_CLK_FDP1_0		19
-#define R8A7793_CLK_TMU3		21
-#define R8A7793_CLK_TMU2		22
-#define R8A7793_CLK_CMT0		24
-#define R8A7793_CLK_TMU0		25
-#define R8A7793_CLK_VSP1_DU1		27
-#define R8A7793_CLK_VSP1_DU0		28
-#define R8A7793_CLK_VSP1_S		31
-
-/* MSTP2 */
-#define R8A7793_CLK_SCIFA2		2
-#define R8A7793_CLK_SCIFA1		3
-#define R8A7793_CLK_SCIFA0		4
-#define R8A7793_CLK_MSIOF2		5
-#define R8A7793_CLK_SCIFB0		6
-#define R8A7793_CLK_SCIFB1		7
-#define R8A7793_CLK_MSIOF1		8
-#define R8A7793_CLK_SCIFB2		16
-#define R8A7793_CLK_SYS_DMAC1		18
-#define R8A7793_CLK_SYS_DMAC0		19
-
-/* MSTP3 */
-#define R8A7793_CLK_TPU0		4
-#define R8A7793_CLK_SDHI2		11
-#define R8A7793_CLK_SDHI1		12
-#define R8A7793_CLK_SDHI0		14
-#define R8A7793_CLK_MMCIF0		15
-#define R8A7793_CLK_IIC0		18
-#define R8A7793_CLK_PCIEC		19
-#define R8A7793_CLK_IIC1		23
-#define R8A7793_CLK_SSUSB		28
-#define R8A7793_CLK_CMT1		29
-#define R8A7793_CLK_USBDMAC0		30
-#define R8A7793_CLK_USBDMAC1		31
-
-/* MSTP4 */
-#define R8A7793_CLK_IRQC		7
-#define R8A7793_CLK_INTC_SYS		8
-
-/* MSTP5 */
-#define R8A7793_CLK_AUDIO_DMAC1		1
-#define R8A7793_CLK_AUDIO_DMAC0		2
-#define R8A7793_CLK_ADSP_MOD		6
-#define R8A7793_CLK_THERMAL		22
-#define R8A7793_CLK_PWM			23
-
-/* MSTP7 */
-#define R8A7793_CLK_EHCI		3
-#define R8A7793_CLK_HSUSB		4
-#define R8A7793_CLK_HSCIF2		13
-#define R8A7793_CLK_SCIF5		14
-#define R8A7793_CLK_SCIF4		15
-#define R8A7793_CLK_HSCIF1		16
-#define R8A7793_CLK_HSCIF0		17
-#define R8A7793_CLK_SCIF3		18
-#define R8A7793_CLK_SCIF2		19
-#define R8A7793_CLK_SCIF1		20
-#define R8A7793_CLK_SCIF0		21
-#define R8A7793_CLK_DU1			23
-#define R8A7793_CLK_DU0			24
-#define R8A7793_CLK_LVDS0		26
-
-/* MSTP8 */
-#define R8A7793_CLK_IPMMU_SGX		0
-#define R8A7793_CLK_VIN2		9
-#define R8A7793_CLK_VIN1		10
-#define R8A7793_CLK_VIN0		11
-#define R8A7793_CLK_ETHER		13
-#define R8A7793_CLK_SATA1		14
-#define R8A7793_CLK_SATA0		15
-
-/* MSTP9 */
-#define R8A7793_CLK_GPIO7		4
-#define R8A7793_CLK_GPIO6		5
-#define R8A7793_CLK_GPIO5		7
-#define R8A7793_CLK_GPIO4		8
-#define R8A7793_CLK_GPIO3		9
-#define R8A7793_CLK_GPIO2		10
-#define R8A7793_CLK_GPIO1		11
-#define R8A7793_CLK_GPIO0		12
-#define R8A7793_CLK_RCAN1		15
-#define R8A7793_CLK_RCAN0		16
-#define R8A7793_CLK_QSPI_MOD		17
-#define R8A7793_CLK_I2C5		25
-#define R8A7793_CLK_IICDVFS		26
-#define R8A7793_CLK_I2C4		27
-#define R8A7793_CLK_I2C3		28
-#define R8A7793_CLK_I2C2		29
-#define R8A7793_CLK_I2C1		30
-#define R8A7793_CLK_I2C0		31
-
-/* MSTP10 */
-#define R8A7793_CLK_SSI_ALL		5
-#define R8A7793_CLK_SSI9		6
-#define R8A7793_CLK_SSI8		7
-#define R8A7793_CLK_SSI7		8
-#define R8A7793_CLK_SSI6		9
-#define R8A7793_CLK_SSI5		10
-#define R8A7793_CLK_SSI4		11
-#define R8A7793_CLK_SSI3		12
-#define R8A7793_CLK_SSI2		13
-#define R8A7793_CLK_SSI1		14
-#define R8A7793_CLK_SSI0		15
-#define R8A7793_CLK_SCU_ALL		17
-#define R8A7793_CLK_SCU_DVC1		18
-#define R8A7793_CLK_SCU_DVC0		19
-#define R8A7793_CLK_SCU_CTU1_MIX1	20
-#define R8A7793_CLK_SCU_CTU0_MIX0	21
-#define R8A7793_CLK_SCU_SRC9		22
-#define R8A7793_CLK_SCU_SRC8		23
-#define R8A7793_CLK_SCU_SRC7		24
-#define R8A7793_CLK_SCU_SRC6		25
-#define R8A7793_CLK_SCU_SRC5		26
-#define R8A7793_CLK_SCU_SRC4		27
-#define R8A7793_CLK_SCU_SRC3		28
-#define R8A7793_CLK_SCU_SRC2		29
-#define R8A7793_CLK_SCU_SRC1		30
-#define R8A7793_CLK_SCU_SRC0		31
-
-/* MSTP11 */
-#define R8A7793_CLK_SCIFA3		6
-#define R8A7793_CLK_SCIFA4		7
-#define R8A7793_CLK_SCIFA5		8
-
-#endif /* __DT_BINDINGS_CLOCK_R8A7793_H__ */
diff --git a/include/dt-bindings/clock/r8a7794-clock.h b/include/dt-bindings/clock/r8a7794-clock.h
deleted file mode 100644
index 649f005782d0..000000000000
--- a/include/dt-bindings/clock/r8a7794-clock.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+
- *
- * Copyright (C) 2014 Renesas Electronics Corporation
- * Copyright 2013 Ideas On Board SPRL
- */
-
-#ifndef __DT_BINDINGS_CLOCK_R8A7794_H__
-#define __DT_BINDINGS_CLOCK_R8A7794_H__
-
-/* CPG */
-#define R8A7794_CLK_MAIN		0
-#define R8A7794_CLK_PLL0		1
-#define R8A7794_CLK_PLL1		2
-#define R8A7794_CLK_PLL3		3
-#define R8A7794_CLK_LB			4
-#define R8A7794_CLK_QSPI		5
-#define R8A7794_CLK_SDH			6
-#define R8A7794_CLK_SD0			7
-#define R8A7794_CLK_RCAN		8
-
-/* MSTP0 */
-#define R8A7794_CLK_MSIOF0		0
-
-/* MSTP1 */
-#define R8A7794_CLK_VCP0		1
-#define R8A7794_CLK_VPC0		3
-#define R8A7794_CLK_TMU1		11
-#define R8A7794_CLK_3DG			12
-#define R8A7794_CLK_2DDMAC		15
-#define R8A7794_CLK_FDP1_0		19
-#define R8A7794_CLK_TMU3		21
-#define R8A7794_CLK_TMU2		22
-#define R8A7794_CLK_CMT0		24
-#define R8A7794_CLK_TMU0		25
-#define R8A7794_CLK_VSP1_DU0		28
-#define R8A7794_CLK_VSP1_S		31
-
-/* MSTP2 */
-#define R8A7794_CLK_SCIFA2		2
-#define R8A7794_CLK_SCIFA1		3
-#define R8A7794_CLK_SCIFA0		4
-#define R8A7794_CLK_MSIOF2		5
-#define R8A7794_CLK_SCIFB0		6
-#define R8A7794_CLK_SCIFB1		7
-#define R8A7794_CLK_MSIOF1		8
-#define R8A7794_CLK_SCIFB2		16
-#define R8A7794_CLK_SYS_DMAC1		18
-#define R8A7794_CLK_SYS_DMAC0		19
-
-/* MSTP3 */
-#define R8A7794_CLK_SDHI2		11
-#define R8A7794_CLK_SDHI1		12
-#define R8A7794_CLK_SDHI0		14
-#define R8A7794_CLK_MMCIF0		15
-#define R8A7794_CLK_IIC0		18
-#define R8A7794_CLK_IIC1		23
-#define R8A7794_CLK_CMT1		29
-#define R8A7794_CLK_USBDMAC0		30
-#define R8A7794_CLK_USBDMAC1		31
-
-/* MSTP4 */
-#define R8A7794_CLK_IRQC		7
-#define R8A7794_CLK_INTC_SYS		8
-
-/* MSTP5 */
-#define R8A7794_CLK_AUDIO_DMAC0		2
-#define R8A7794_CLK_PWM			23
-
-/* MSTP7 */
-#define R8A7794_CLK_EHCI		3
-#define R8A7794_CLK_HSUSB		4
-#define R8A7794_CLK_HSCIF2		13
-#define R8A7794_CLK_SCIF5		14
-#define R8A7794_CLK_SCIF4		15
-#define R8A7794_CLK_HSCIF1		16
-#define R8A7794_CLK_HSCIF0		17
-#define R8A7794_CLK_SCIF3		18
-#define R8A7794_CLK_SCIF2		19
-#define R8A7794_CLK_SCIF1		20
-#define R8A7794_CLK_SCIF0		21
-#define R8A7794_CLK_DU1			23
-#define R8A7794_CLK_DU0			24
-
-/* MSTP8 */
-#define R8A7794_CLK_VIN1		10
-#define R8A7794_CLK_VIN0		11
-#define R8A7794_CLK_ETHERAVB		12
-#define R8A7794_CLK_ETHER		13
-
-/* MSTP9 */
-#define R8A7794_CLK_GPIO6		5
-#define R8A7794_CLK_GPIO5		7
-#define R8A7794_CLK_GPIO4		8
-#define R8A7794_CLK_GPIO3		9
-#define R8A7794_CLK_GPIO2		10
-#define R8A7794_CLK_GPIO1		11
-#define R8A7794_CLK_GPIO0		12
-#define R8A7794_CLK_RCAN1		15
-#define R8A7794_CLK_RCAN0		16
-#define R8A7794_CLK_QSPI_MOD		17
-#define R8A7794_CLK_I2C5		25
-#define R8A7794_CLK_I2C4		27
-#define R8A7794_CLK_I2C3		28
-#define R8A7794_CLK_I2C2		29
-#define R8A7794_CLK_I2C1		30
-#define R8A7794_CLK_I2C0		31
-
-/* MSTP10 */
-#define R8A7794_CLK_SSI_ALL		5
-#define R8A7794_CLK_SSI9		6
-#define R8A7794_CLK_SSI8		7
-#define R8A7794_CLK_SSI7		8
-#define R8A7794_CLK_SSI6		9
-#define R8A7794_CLK_SSI5		10
-#define R8A7794_CLK_SSI4		11
-#define R8A7794_CLK_SSI3		12
-#define R8A7794_CLK_SSI2		13
-#define R8A7794_CLK_SSI1		14
-#define R8A7794_CLK_SSI0		15
-#define R8A7794_CLK_SCU_ALL		17
-#define R8A7794_CLK_SCU_DVC1		18
-#define R8A7794_CLK_SCU_DVC0		19
-#define R8A7794_CLK_SCU_CTU1_MIX1	20
-#define R8A7794_CLK_SCU_CTU0_MIX0	21
-#define R8A7794_CLK_SCU_SRC6		25
-#define R8A7794_CLK_SCU_SRC5		26
-#define R8A7794_CLK_SCU_SRC4		27
-#define R8A7794_CLK_SCU_SRC3		28
-#define R8A7794_CLK_SCU_SRC2		29
-#define R8A7794_CLK_SCU_SRC1		30
-
-/* MSTP11 */
-#define R8A7794_CLK_SCIFA3		6
-#define R8A7794_CLK_SCIFA4		7
-#define R8A7794_CLK_SCIFA5		8
-
-#endif /* __DT_BINDINGS_CLOCK_R8A7794_H__ */
-- 
cgit v1.2.3


From 2f50304e9efb69604040feadc13f9590be8cd391 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Tue, 4 Jun 2024 18:05:13 +0100
Subject: serial: sh-sci: Add support for RZ/V2H(P) SoC

Add serial support for RZ/V2H(P) SoC with earlycon.

The SCIF interface in the Renesas RZ/V2H(P) is similar to that available
in the RZ/G2L (R9A07G044) SoC, with the following differences:

- RZ/V2H(P) SoC has three additional interrupts: one for Tx end/Rx ready
  and two for Rx and Tx buffer full, all of which are edge-triggered.
- RZ/V2H(P) supports asynchronous mode, whereas RZ/G2L supports both
  synchronous and asynchronous modes.
- There are differences in the configuration of certain registers such
  as SCSMR, SCFCR, and SCSPTR between the two SoCs.

To handle these differences on RZ/V2H(P) SoC SCIx_RZV2H_SCIF_REGTYPE
is added.

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20240604170513.522631-6-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_sci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 1c89611e0e06..0f2f50b8d28e 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -37,6 +37,7 @@ enum {
 	SCIx_SH7705_SCIF_REGTYPE,
 	SCIx_HSCIF_REGTYPE,
 	SCIx_RZ_SCIFA_REGTYPE,
+	SCIx_RZV2H_SCIF_REGTYPE,
 
 	SCIx_NR_REGTYPES,
 };
-- 
cgit v1.2.3


From c5603e2a621dac10c5e21cc430848ebcfa6c7e01 Mon Sep 17 00:00:00 2001
From: Doug Brown <doug@schmorgal.com>
Date: Thu, 6 Jun 2024 12:56:31 -0700
Subject: Revert "serial: core: only stop transmit when HW fifo is empty"

This reverts commit 7bfb915a597a301abb892f620fe5c283a9fdbd77.

This commit broke pxa and omap-serial, because it inhibited them from
calling stop_tx() if their TX FIFOs weren't completely empty. This
resulted in these two drivers hanging during transmits because the TX
interrupt would stay enabled, and a new TX interrupt would never fire.

Cc: stable@vger.kernel.org
Fixes: 7bfb915a597a ("serial: core: only stop transmit when HW fifo is empty")
Signed-off-by: Doug Brown <doug@schmorgal.com>
Link: https://lore.kernel.org/r/20240606195632.173255-2-doug@schmorgal.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 8cb65f50e830..3fb9a29e025f 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -811,8 +811,7 @@ enum UART_TX_FLAGS {
 	if (pending < WAKEUP_CHARS) {					      \
 		uart_write_wakeup(__port);				      \
 									      \
-		if (!((flags) & UART_TX_NOSTOP) && pending == 0 &&	      \
-		    __port->ops->tx_empty(__port))			      \
+		if (!((flags) & UART_TX_NOSTOP) && pending == 0)	      \
 			__port->ops->stop_tx(__port);			      \
 	}								      \
 									      \
-- 
cgit v1.2.3


From 9bb43b9e8d9a288a214e9b17acc9e46fda3977cf Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jonas.gorski@gmail.com>
Date: Thu, 6 Jun 2024 12:56:32 -0700
Subject: serial: core: introduce uart_port_tx_limited_flags()

Analogue to uart_port_tx_flags() introduced in commit 3ee07964d407
("serial: core: introduce uart_port_tx_flags()"), add a _flags variant
for uart_port_tx_limited().

Fixes: d11cc8c3c4b6 ("tty: serial: use uart_port_tx_limited()")
Cc: stable@vger.kernel.org
Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Signed-off-by: Doug Brown <doug@schmorgal.com>
Link: https://lore.kernel.org/r/20240606195632.173255-3-doug@schmorgal.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 3fb9a29e025f..aea25eef9a1a 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -850,6 +850,24 @@ enum UART_TX_FLAGS {
 			__count--);					      \
 })
 
+/**
+ * uart_port_tx_limited_flags -- transmit helper for uart_port with count limiting with flags
+ * @port: uart port
+ * @ch: variable to store a character to be written to the HW
+ * @flags: %UART_TX_NOSTOP or similar
+ * @count: a limit of characters to send
+ * @tx_ready: can HW accept more data function
+ * @put_char: function to write a character
+ * @tx_done: function to call after the loop is done
+ *
+ * See uart_port_tx_limited() for more details.
+ */
+#define uart_port_tx_limited_flags(port, ch, flags, count, tx_ready, put_char, tx_done) ({ \
+	unsigned int __count = (count);							   \
+	__uart_port_tx(port, ch, flags, tx_ready, put_char, tx_done, __count,		   \
+			__count--);							   \
+})
+
 /**
  * uart_port_tx -- transmit helper for uart_port
  * @port: uart port
-- 
cgit v1.2.3


From 50cf5f3842af3135b88b041890e7e12a74425fcb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 6 Jun 2024 12:25:01 -0600
Subject: io_uring/msg_ring: add an alloc cache for io_kiocb entries

With slab accounting, allocating and freeing memory has considerable
overhead. Add a basic alloc cache for the io_kiocb allocations that
msg_ring needs to do. Unlike other caches, this one is used by the
sender, grabbing it from the remote ring. When the remote ring gets
the posted completion, it'll free it locally. Hence it is separately
locked, using ctx->msg_lock.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 1052a68fd68d..ede42dce1506 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -397,6 +397,9 @@ struct io_ring_ctx {
 	struct callback_head		poll_wq_task_work;
 	struct list_head		defer_list;
 
+	struct io_alloc_cache		msg_cache;
+	spinlock_t			msg_lock;
+
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	struct list_head	napi_list;	/* track busy poll napi_id */
 	spinlock_t		napi_lock;	/* napi_list lock */
-- 
cgit v1.2.3


From 89f415b99050fbf7bcf3fca0cde30c09b62265eb Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 20 Jun 2024 17:39:45 +0200
Subject: mfd: tmio: Remove obsolete .set_clk_div() callback

Commit bef64d2908e825c5 ("mmc: remove tmio_mmc driver") removed the last
user of the .set_clk_div() callback in the tmio_mmc_data structure.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Acked-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/e0fa98f138a7b2836128178f8b3a757978517307.1718897545.git.geert+renesas@glider.be
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mfd/tmio.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index eace8ea6cda0..aca74ac1ff69 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -101,7 +101,6 @@ struct tmio_mmc_data {
 	unsigned int			max_blk_count;
 	unsigned short			max_segs;
 	void (*set_pwr)(struct platform_device *host, int state);
-	void (*set_clk_div)(struct platform_device *host, int state);
 };
 
 /*
-- 
cgit v1.2.3


From f86937afb446d056e9e385da05536eb26483874c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 20 Jun 2024 17:39:46 +0200
Subject: mmc: tmio: Remove obsolete .set_pwr() callback()

Commit ca78476e4888f1f1 ("mfd: Remove toshiba tmio drivers") removed the
last users of the .set_pwr() callback in the tmio_mmc_data structure.
Remove the callback, and all related infrastructure.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Lee Jones <lee@kernel.org>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Link: https://lore.kernel.org/r/fbbc13ddd19df2c40933ffa3b82fb14841bf1d4c.1718897545.git.geert+renesas@glider.be
---
 include/linux/mfd/tmio.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index aca74ac1ff69..8c09d14a3a28 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -100,7 +100,6 @@ struct tmio_mmc_data {
 	dma_addr_t			dma_rx_offset;
 	unsigned int			max_blk_count;
 	unsigned short			max_segs;
-	void (*set_pwr)(struct platform_device *host, int state);
 };
 
 /*
-- 
cgit v1.2.3


From 4b8e88e563b5f666446d002ad0dc1e6e8e7102b0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Jun 2024 11:34:09 +0200
Subject: ftruncate: pass a signed offset

The old ftruncate() syscall, using the 32-bit off_t misses a sign
extension when called in compat mode on 64-bit architectures.  As a
result, passing a negative length accidentally succeeds in truncating
to file size between 2GiB and 4GiB.

Changing the type of the compat syscall to the signed compat_off_t
changes the behavior so it instead returns -EINVAL.

The native entry point, the truncate() syscall and the corresponding
loff_t based variants are all correct already and do not suffer
from this mistake.

Fixes: 3f6d078d4acc ("fix compat truncate/ftruncate")
Reviewed-by: Christian Brauner <brauner@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/compat.h   | 2 +-
 include/linux/syscalls.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 233f61ec8afc..56cebaff0c91 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -608,7 +608,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd,
 asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
 				     struct compat_statfs64 __user *buf);
 asmlinkage long compat_sys_truncate(const char __user *, compat_off_t);
-asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t);
+asmlinkage long compat_sys_ftruncate(unsigned int, compat_off_t);
 /* No generic prototype for truncate64, ftruncate64, fallocate */
 asmlinkage long compat_sys_openat(int dfd, const char __user *filename,
 				  int flags, umode_t mode);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 9104952d323d..ba9337709878 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -418,7 +418,7 @@ asmlinkage long sys_listmount(const struct mnt_id_req __user *req,
 			      u64 __user *mnt_ids, size_t nr_mnt_ids,
 			      unsigned int flags);
 asmlinkage long sys_truncate(const char __user *path, long length);
-asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
+asmlinkage long sys_ftruncate(unsigned int fd, off_t length);
 #if BITS_PER_LONG == 32
 asmlinkage long sys_truncate64(const char __user *path, loff_t length);
 asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length);
-- 
cgit v1.2.3


From 118d777c4cb40f44e7b675955fd2da8b22f83913 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 17 Jun 2024 12:18:37 +0100
Subject: wordpart.h: Add REPEAT_BYTE_U32()

In some cases it's necessary to replicate a byte across a u32 value, for
which REPEAT_BYTE() would be helpful. Currently this requires explicit
masking of the result to avoid sparse warnings, as e.g.

    (u32)REPEAT_BYTE(0xa0))

... will result in a warning:

    cast truncates bits from constant value (a0a0a0a0a0a0a0a0 becomes a0a0a0a0)

Add a new REPEAT_BYTE_U32() which does the necessary masking internally,
so that we don't need to duplicate this for every usage.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20240617111841.2529370-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/wordpart.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/wordpart.h b/include/linux/wordpart.h
index 4ca1ba66d2f0..5a7b97bb7c95 100644
--- a/include/linux/wordpart.h
+++ b/include/linux/wordpart.h
@@ -39,6 +39,14 @@
  */
 #define REPEAT_BYTE(x)	((~0ul / 0xff) * (x))
 
+/**
+ * REPEAT_BYTE_U32 - repeat the value @x multiple times as a u32 value
+ * @x: value to repeat
+ *
+ * NOTE: @x is not checked for > 0xff; larger values produce odd results.
+ */
+#define REPEAT_BYTE_U32(x)	lower_32_bits(REPEAT_BYTE(x))
+
 /* Set bits in the first 'n' bytes when loaded from memory */
 #ifdef __LITTLE_ENDIAN
 #  define aligned_byte_mask(n) ((1UL << 8*(n))-1)
-- 
cgit v1.2.3


From a6156e70316b322b61739468e465bfb1345b7ae2 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 17 Jun 2024 12:18:39 +0100
Subject: irqchip/gic-v3: Make distributor priorities variables

In subsequent patches the GICv3 driver will choose the regular interrupt
priority at boot time.

In preparation for using dynamic priorities, place the priorities in
variables and update the code to pass these as parameters. Users of
GICD_INT_DEF_PRI_X4 are modified to replicate the priority byte using
REPEAT_BYTE_U32().

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20240617111841.2529370-4-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqchip/arm-gic-common.h | 4 ----
 include/linux/irqchip/arm-gic-v3.h     | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h
index 1177f3a1aed5..fc0246cc05ac 100644
--- a/include/linux/irqchip/arm-gic-common.h
+++ b/include/linux/irqchip/arm-gic-common.h
@@ -10,10 +10,6 @@
 #include <linux/irqchip/arm-vgic-info.h>
 
 #define GICD_INT_DEF_PRI		0xa0
-#define GICD_INT_DEF_PRI_X4		((GICD_INT_DEF_PRI << 24) |\
-					(GICD_INT_DEF_PRI << 16) |\
-					(GICD_INT_DEF_PRI << 8) |\
-					GICD_INT_DEF_PRI)
 
 struct irq_domain;
 struct fwnode_handle;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 728691365464..70c0948f978e 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -638,7 +638,7 @@ struct fwnode_handle;
 int __init its_lpi_memreserve_init(void);
 int its_cpu_init(void);
 int its_init(struct fwnode_handle *handle, struct rdists *rdists,
-	     struct irq_domain *domain);
+	     struct irq_domain *domain, u8 irq_prio);
 int mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent);
 
 static inline bool gic_enable_sre(void)
-- 
cgit v1.2.3


From 18fdb6348c480fb646799f621204f674815f05e7 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 17 Jun 2024 12:18:41 +0100
Subject: arm64: irqchip/gic-v3: Select priorities at boot time

The distributor and PMR/RPR can present different views of the interrupt
priority space dependent upon the values of GICD_CTLR.DS and
SCR_EL3.FIQ. Currently we treat the distributor's view of the priority
space as canonical, and when the two differ we change the way we handle
values in the PMR/RPR, using the `gic_nonsecure_priorities` static key
to decide what to do.

This approach works, but it's sub-optimal. When using pseudo-NMI we
manipulate the distributor rarely, and we manipulate the PMR/RPR
registers very frequently in code spread out throughout the kernel (e.g.
local_irq_{save,restore}()). It would be nicer if we could use fixed
values for the PMR/RPR, and dynamically choose the values programmed
into the distributor.

This patch changes the GICv3 driver and arm64 code accordingly. PMR
values are chosen at compile time, and the GICv3 driver determines the
appropriate values to program into the distributor at boot time. This
removes the need for the `gic_nonsecure_priorities` static key and
results in smaller and better generated code for saving/restoring the
irqflags.

Before this patch, local_irq_disable() compiles to:

| 0000000000000000 <outlined_local_irq_disable>:
|    0:   d503201f        nop
|    4:   d50343df        msr     daifset, #0x3
|    8:   d65f03c0        ret
|    c:   d503201f        nop
|   10:   d2800c00        mov     x0, #0x60                       // #96
|   14:   d5184600        msr     icc_pmr_el1, x0
|   18:   d65f03c0        ret
|   1c:   d2801400        mov     x0, #0xa0                       // #160
|   20:   17fffffd        b       14 <outlined_local_irq_disable+0x14>

After this patch, local_irq_disable() compiles to:

| 0000000000000000 <outlined_local_irq_disable>:
|    0:   d503201f        nop
|    4:   d50343df        msr     daifset, #0x3
|    8:   d65f03c0        ret
|    c:   d2801800        mov     x0, #0xc0                       // #192
|   10:   d5184600        msr     icc_pmr_el1, x0
|   14:   d65f03c0        ret

... with 3 fewer instructions per call.

For defconfig + CONFIG_PSEUDO_NMI=y, this results in a minor saving of
~4K of text, and will make it easier to make further improvements to the
way we manipulate irqflags and DAIF bits.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20240617111841.2529370-6-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqchip/arm-gic-v3-prio.h | 52 +++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 include/linux/irqchip/arm-gic-v3-prio.h

(limited to 'include')

diff --git a/include/linux/irqchip/arm-gic-v3-prio.h b/include/linux/irqchip/arm-gic-v3-prio.h
new file mode 100644
index 000000000000..44157c9abb78
--- /dev/null
+++ b/include/linux/irqchip/arm-gic-v3-prio.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __LINUX_IRQCHIP_ARM_GIC_V3_PRIO_H
+#define __LINUX_IRQCHIP_ARM_GIC_V3_PRIO_H
+
+/*
+ * GIC priorities from the view of the PMR/RPR.
+ *
+ * These values are chosen to be valid in either the absolute priority space or
+ * the NS view of the priority space. The value programmed into the distributor
+ * and ITS will be chosen at boot time such that these values appear in the
+ * PMR/RPR.
+ *
+ * GICV3_PRIO_UNMASKED is the PMR view of the priority to use to permit both
+ * IRQs and pseudo-NMIs.
+ *
+ * GICV3_PRIO_IRQ is the PMR view of the priority of regular interrupts. This
+ * can be written to the PMR to mask regular IRQs.
+ *
+ * GICV3_PRIO_NMI is the PMR view of the priority of pseudo-NMIs. This can be
+ * written to the PMR to mask pseudo-NMIs.
+ *
+ * On arm64 some code sections either automatically switch back to PSR.I or
+ * explicitly require to not use priority masking. If bit GICV3_PRIO_PSR_I_SET
+ * is included in the priority mask, it indicates that PSR.I should be set and
+ * interrupt disabling temporarily does not rely on IRQ priorities.
+ */
+#define GICV3_PRIO_UNMASKED	0xe0
+#define GICV3_PRIO_IRQ		0xc0
+#define GICV3_PRIO_NMI		0x80
+
+#define GICV3_PRIO_PSR_I_SET	(1 << 4)
+
+#ifndef __ASSEMBLER__
+
+#define __gicv3_prio_to_ns(p)	(0xff & ((p) << 1))
+#define __gicv3_ns_to_prio(ns)	(0x80 | ((ns) >> 1))
+
+#define __gicv3_prio_valid_ns(p) \
+	(__gicv3_ns_to_prio(__gicv3_prio_to_ns(p)) == (p))
+
+static_assert(__gicv3_prio_valid_ns(GICV3_PRIO_NMI));
+static_assert(__gicv3_prio_valid_ns(GICV3_PRIO_IRQ));
+
+static_assert(GICV3_PRIO_NMI < GICV3_PRIO_IRQ);
+static_assert(GICV3_PRIO_IRQ < GICV3_PRIO_UNMASKED);
+
+static_assert(GICV3_PRIO_IRQ < (GICV3_PRIO_IRQ | GICV3_PRIO_PSR_I_SET));
+
+#endif /* __ASSEMBLER */
+
+#endif /* __LINUX_IRQCHIP_ARM_GIC_V3_PRIO_H */
-- 
cgit v1.2.3


From 12046f8c77e0ed6d41beabde0edbb729499c970b Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Mon, 24 Jun 2024 19:31:15 +0200
Subject: platform/x86: wmi: Add driver_override support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for forcing the WMI driver core to bind
a certain WMI driver to a WMI device. This will be
necessary to support generic WMI drivers without an
ID table

Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20240624173116.31314-2-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/wmi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index 63cca3b58d6d..3275470b5531 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -16,12 +16,16 @@
  * struct wmi_device - WMI device structure
  * @dev: Device associated with this WMI device
  * @setable: True for devices implementing the Set Control Method
+ * @driver_override: Driver name to force a match; do not set directly,
+ *		     because core frees it; use driver_set_override() to
+ *		     set or clear it.
  *
  * This represents WMI devices discovered by the WMI driver core.
  */
 struct wmi_device {
 	struct device dev;
 	bool setable;
+	const char *driver_override;
 };
 
 /**
-- 
cgit v1.2.3


From 44348870de4b8f292f97b84583a298d66fbaf738 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Jun 2024 19:38:35 +0200
Subject: block: fix the blk_queue_nonrot polarity

Take care of the inverse polarity of the BLK_FEAT_ROTATIONAL flag
vs the old nonrot helper.

Fixes: bd4a633b6f7c ("block: move the nonrot flag to queue_limits")
Reported-by: kernel test robot <oliver.sang@intel.com>
Reported-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240624173835.76753-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e89003360c17..b2f1362c4681 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -617,7 +617,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
-#define blk_queue_nonrot(q)	((q)->limits.features & BLK_FEAT_ROTATIONAL)
+#define blk_queue_nonrot(q)	(!((q)->limits.features & BLK_FEAT_ROTATIONAL))
 #define blk_queue_io_stat(q)	((q)->limits.features & BLK_FEAT_IO_STAT)
 #define blk_queue_zone_resetall(q)	\
 	((q)->limits.features & BLK_FEAT_ZONE_RESETALL)
-- 
cgit v1.2.3


From 4a24bbabc826eaba6f94a9741712117b8e2b0587 Mon Sep 17 00:00:00 2001
From: Chengming Zhou <chengming.zhou@linux.dev>
Date: Fri, 7 Jun 2024 16:40:14 +0800
Subject: slab: delete useless RED_INACTIVE and RED_ACTIVE

These seem useless since we use the SLUB_RED_INACTIVE and SLUB_RED_ACTIVE,
so just delete them, no functional change.

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Chengming Zhou <chengming.zhou@linux.dev>
Reviewed-by: Christoph Lameter (Ampere) <cl@linux.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/poison.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/poison.h b/include/linux/poison.h
index 1f0ee2459f2a..9c1a035af97c 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -38,11 +38,8 @@
  * Magic nums for obj red zoning.
  * Placed in the first word before and the first word after an obj.
  */
-#define	RED_INACTIVE	0x09F911029D74E35BULL	/* when obj is inactive */
-#define	RED_ACTIVE	0xD84156C5635688C0ULL	/* when obj is active */
-
-#define SLUB_RED_INACTIVE	0xbb
-#define SLUB_RED_ACTIVE		0xcc
+#define SLUB_RED_INACTIVE	0xbb	/* when obj is inactive */
+#define SLUB_RED_ACTIVE		0xcc	/* when obj is active */
 
 /* ...and for poisoning */
 #define	POISON_INUSE	0x5a	/* for use-uninitialised poisoning */
-- 
cgit v1.2.3


From d89a5c6705998ddc42b104f8eabd3c4b9e8fde08 Mon Sep 17 00:00:00 2001
From: Weiwen Hu <huweiwen@linux.alibaba.com>
Date: Mon, 3 Jun 2024 20:57:00 +0800
Subject: nvme: fix status magic numbers

Replaced some magic numbers about SC and SCT with enum and macro.

Signed-off-by: Weiwen Hu <huweiwen@linux.alibaba.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 425573202295..c8d7fdd095a1 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1846,6 +1846,7 @@ enum {
 	/*
 	 * Generic Command Status:
 	 */
+	NVME_SCT_GENERIC		= 0x0,
 	NVME_SC_SUCCESS			= 0x0,
 	NVME_SC_INVALID_OPCODE		= 0x1,
 	NVME_SC_INVALID_FIELD		= 0x2,
@@ -1893,6 +1894,7 @@ enum {
 	/*
 	 * Command Specific Status:
 	 */
+	NVME_SCT_COMMAND_SPECIFIC	= 0x100,
 	NVME_SC_CQ_INVALID		= 0x100,
 	NVME_SC_QID_INVALID		= 0x101,
 	NVME_SC_QUEUE_SIZE		= 0x102,
@@ -1966,6 +1968,7 @@ enum {
 	/*
 	 * Media and Data Integrity Errors:
 	 */
+	NVME_SCT_MEDIA_ERROR		= 0x200,
 	NVME_SC_WRITE_FAULT		= 0x280,
 	NVME_SC_READ_ERROR		= 0x281,
 	NVME_SC_GUARD_CHECK		= 0x282,
@@ -1978,6 +1981,7 @@ enum {
 	/*
 	 * Path-related Errors:
 	 */
+	NVME_SCT_PATH			= 0x300,
 	NVME_SC_INTERNAL_PATH_ERROR	= 0x300,
 	NVME_SC_ANA_PERSISTENT_LOSS	= 0x301,
 	NVME_SC_ANA_INACCESSIBLE	= 0x302,
@@ -1986,11 +1990,17 @@ enum {
 	NVME_SC_HOST_PATH_ERROR		= 0x370,
 	NVME_SC_HOST_ABORTED_CMD	= 0x371,
 
-	NVME_SC_CRD			= 0x1800,
+	NVME_SC_MASK			= 0x00ff, /* Status Code */
+	NVME_SCT_MASK			= 0x0700, /* Status Code Type */
+	NVME_SCT_SC_MASK		= NVME_SCT_MASK | NVME_SC_MASK,
+
+	NVME_SC_CRD			= 0x1800, /* Command Retry Delayed */
 	NVME_SC_MORE			= 0x2000,
-	NVME_SC_DNR			= 0x4000,
+	NVME_SC_DNR			= 0x4000, /* Do Not Retry */
 };
 
+#define NVME_SCT(status) ((status) >> 8 & 7)
+
 struct nvme_completion {
 	/*
 	 * Used by Admin and Fabrics commands to return data:
-- 
cgit v1.2.3


From dd0b0a4a2c5d7209457dc172997d1243ad269cfa Mon Sep 17 00:00:00 2001
From: Weiwen Hu <huweiwen@linux.alibaba.com>
Date: Mon, 3 Jun 2024 20:57:01 +0800
Subject: nvme: rename CDR/MORE/DNR to NVME_STATUS_*

CDR/MORE/DNR fields are not belonging to SC in the NVMe spec, rename
them to NVME_STATUS_* to avoid confusion.

Signed-off-by: Weiwen Hu <huweiwen@linux.alibaba.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c8d7fdd095a1..27faae34245d 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1994,9 +1994,9 @@ enum {
 	NVME_SCT_MASK			= 0x0700, /* Status Code Type */
 	NVME_SCT_SC_MASK		= NVME_SCT_MASK | NVME_SC_MASK,
 
-	NVME_SC_CRD			= 0x1800, /* Command Retry Delayed */
-	NVME_SC_MORE			= 0x2000,
-	NVME_SC_DNR			= 0x4000, /* Do Not Retry */
+	NVME_STATUS_CRD			= 0x1800, /* Command Retry Delayed */
+	NVME_STATUS_MORE		= 0x2000,
+	NVME_STATUS_DNR			= 0x4000, /* Do Not Retry */
 };
 
 #define NVME_SCT(status) ((status) >> 8 & 7)
-- 
cgit v1.2.3


From 99032e9dbabcfc64bf71e94a1b662c4bfe534ed1 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@kernel.org>
Date: Mon, 27 May 2024 07:15:23 +0200
Subject: nvmet-fc: implement host_traddr()

Implement callback to display the host transport address by
adding a callback 'host_traddr' for nvmet_fc_target_template.

Signed-off-by: Hannes Reinecke <hare@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme-fc-driver.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 4109f1bd6128..89ea1ebd975a 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -920,6 +920,9 @@ struct nvmet_fc_target_port {
  *       further references to hosthandle.
  *       Entrypoint is Mandatory if the lldd calls nvmet_fc_invalidate_host().
  *
+ * @host_traddr: called by the transport to retrieve the node name and
+ *       port name of the host port address.
+ *
  * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
  *       supports for cpu affinitization.
  *       Value is Mandatory. Must be at least 1.
@@ -975,6 +978,7 @@ struct nvmet_fc_target_template {
 	void (*ls_abort)(struct nvmet_fc_target_port *targetport,
 				void *hosthandle, struct nvmefc_ls_req *lsreq);
 	void (*host_release)(void *hosthandle);
+	int (*host_traddr)(void *hosthandle, u64 *wwnn, u64 *wwpn);
 
 	u32	max_hw_queues;
 	u16	max_sgl_segments;
-- 
cgit v1.2.3


From 25e6e00d3f78d73c30ee78fee8060cf68908be50 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Mon, 24 Jun 2024 07:08:52 +0900
Subject: firewire: core: add tracepoints events for allocation/deallocation of
 isochronous context

It is helpful to trace the allocation and dealocation of isochronous
when the core function is requested them by both in-kernel unit drivers
and userspace applications.

This commit adds some tracepoints events for the aim.

Link: https://lore.kernel.org/r/20240623220859.851685-2-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 105 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index 25289a063deb..0cca1ef841f3 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -436,6 +436,111 @@ TRACE_EVENT(self_id_sequence,
 #undef PHY_PACKET_SELF_ID_GET_POWER_CLASS
 #undef PHY_PACKET_SELF_ID_GET_INITIATED_RESET
 
+TRACE_EVENT_CONDITION(isoc_outbound_allocate,
+	TP_PROTO(const struct fw_iso_context *ctx, unsigned int channel, unsigned int scode),
+	TP_ARGS(ctx, channel, scode),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT),
+	TP_STRUCT__entry(
+		__field(u64, context)
+		__field(u8, card_index)
+		__field(u8, channel)
+		__field(u8, scode)
+	),
+	TP_fast_assign(
+		__entry->context = (uintptr_t)ctx;
+		__entry->card_index = ctx->card->index;
+		__entry->channel = channel;
+		__entry->scode = scode;
+	),
+	TP_printk(
+		"context=0x%llx card_index=%u channel=%u scode=%u",
+		__entry->context,
+		__entry->card_index,
+		__entry->channel,
+		__entry->scode
+	)
+);
+
+TRACE_EVENT_CONDITION(isoc_inbound_single_allocate,
+	TP_PROTO(const struct fw_iso_context *ctx, unsigned int channel, unsigned int header_size),
+	TP_ARGS(ctx, channel, header_size),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE),
+	TP_STRUCT__entry(
+		__field(u64, context)
+		__field(u8, card_index)
+		__field(u8, channel)
+		__field(u8, header_size)
+	),
+	TP_fast_assign(
+		__entry->context = (uintptr_t)ctx;
+		__entry->card_index = ctx->card->index;
+		__entry->channel = channel;
+		__entry->header_size = header_size;
+	),
+	TP_printk(
+		"context=0x%llx card_index=%u channel=%u header_size=%u",
+		__entry->context,
+		__entry->card_index,
+		__entry->channel,
+		__entry->header_size
+	)
+);
+
+TRACE_EVENT_CONDITION(isoc_inbound_multiple_allocate,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL),
+	TP_STRUCT__entry(
+		__field(u64, context)
+		__field(u8, card_index)
+	),
+	TP_fast_assign(
+		__entry->context = (uintptr_t)ctx;
+		__entry->card_index = ctx->card->index;
+	),
+	TP_printk(
+		"context=0x%llx card_index=%u",
+		__entry->context,
+		__entry->card_index
+	)
+);
+
+DECLARE_EVENT_CLASS(isoc_destroy_template,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_STRUCT__entry(
+		__field(u64, context)
+		__field(u8, card_index)
+	),
+	TP_fast_assign(
+		__entry->context = (uintptr_t)ctx;
+		__entry->card_index = ctx->card->index;
+	),
+	TP_printk(
+		"context=0x%llx card_index=%u",
+		__entry->context,
+		__entry->card_index
+	)
+)
+
+DEFINE_EVENT_CONDITION(isoc_destroy_template, isoc_outbound_destroy,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT)
+);
+
+DEFINE_EVENT_CONDITION(isoc_destroy_template, isoc_inbound_single_destroy,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE)
+);
+
+DEFINE_EVENT_CONDITION(isoc_destroy_template, isoc_inbound_multiple_destroy,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
+);
+
 #undef QUADLET_SIZE
 
 #endif // _FIREWIRE_TRACE_EVENT_H
-- 
cgit v1.2.3


From 9f16ac725b233c53e2a043aa8d1b6f0453e94556 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Mon, 24 Jun 2024 07:08:53 +0900
Subject: firewire: core: add tracepoints events for setting channels of
 multichannel context

It is helpful to trace the channel setting for the multichannel isochronous
context when the core function is requested it by both in-kernel unit
drivers and userspace applications.

This commit adds some tracepoints events for the aim.

Link: https://lore.kernel.org/r/20240623220859.851685-3-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index 0cca1ef841f3..0334e02f9d94 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -541,6 +541,27 @@ DEFINE_EVENT_CONDITION(isoc_destroy_template, isoc_inbound_multiple_destroy,
 	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
 );
 
+TRACE_EVENT(isoc_inbound_multiple_channels,
+	TP_PROTO(const struct fw_iso_context *ctx, u64 channels),
+	TP_ARGS(ctx, channels),
+	TP_STRUCT__entry(
+		__field(u64, context)
+		__field(u8, card_index)
+		__field(u64, channels)
+	),
+	TP_fast_assign(
+		__entry->context = (uintptr_t)ctx;
+		__entry->card_index = ctx->card->index;
+		__entry->channels = channels;
+	),
+	TP_printk(
+		"context=0x%llx card_index=%u channels=0x%016llx",
+		__entry->context,
+		__entry->card_index,
+		__entry->channels
+	)
+);
+
 #undef QUADLET_SIZE
 
 #endif // _FIREWIRE_TRACE_EVENT_H
-- 
cgit v1.2.3


From 4e64210f67126e77d54ffcd3297a87539b858322 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Mon, 24 Jun 2024 07:08:54 +0900
Subject: firewire: core: add tracepoints events for starting/stopping of
 isochronous context

It is helpful to trace the starting and stopping of isochronous context
when the core function is requested them by both in-kernel unit drivers
and userspace applications.

This commit adds some tracepoints events for the aim.

Link: https://lore.kernel.org/r/20240623220859.851685-4-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 108 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index 0334e02f9d94..c984b7d6380f 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -562,6 +562,114 @@ TRACE_EVENT(isoc_inbound_multiple_channels,
 	)
 );
 
+TRACE_EVENT_CONDITION(isoc_outbound_start,
+	TP_PROTO(const struct fw_iso_context *ctx, int cycle_match),
+	TP_ARGS(ctx, cycle_match),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT),
+	TP_STRUCT__entry(
+		__field(u64, context)
+		__field(u8, card_index)
+		__field(bool, cycle_match)
+		__field(u16, cycle)
+	),
+	TP_fast_assign(
+		__entry->context = (uintptr_t)ctx;
+		__entry->card_index = ctx->card->index;
+		__entry->cycle_match = cycle_match < 0 ? false : true;
+		__entry->cycle = __entry->cycle_match ? (u16)cycle_match : 0;
+	),
+	TP_printk(
+		"context=0x%llx card_index=%u cycle_match=%s cycle=0x%04x",
+		__entry->context,
+		__entry->card_index,
+		__entry->cycle_match ? "true" : "false",
+		__entry->cycle
+	)
+);
+
+DECLARE_EVENT_CLASS(isoc_inbound_start_template,
+	TP_PROTO(const struct fw_iso_context *ctx, int cycle_match, unsigned int sync, unsigned int tags),
+	TP_ARGS(ctx, cycle_match, sync, tags),
+	TP_STRUCT__entry(
+		__field(u64, context)
+		__field(u8, card_index)
+		__field(bool, cycle_match)
+		__field(u16, cycle)
+		__field(u8, sync)
+		__field(u8, tags)
+	),
+	TP_fast_assign(
+		__entry->context = (uintptr_t)ctx;
+		__entry->card_index = ctx->card->index;
+		__entry->cycle_match = cycle_match < 0 ? false : true;
+		__entry->cycle = __entry->cycle_match ? (u16)cycle_match : 0;
+		__entry->sync = sync;
+		__entry->tags = tags;
+	),
+	TP_printk(
+		"context=0x%llx card_index=%u cycle_match=%s cycle=0x%04x sync=%u tags=%s",
+		__entry->context,
+		__entry->card_index,
+		__entry->cycle_match ? "true" : "false",
+		__entry->cycle,
+		__entry->sync,
+		__print_flags(__entry->tags, "|",
+			{ FW_ISO_CONTEXT_MATCH_TAG0, "0" },
+			{ FW_ISO_CONTEXT_MATCH_TAG1, "1" },
+			{ FW_ISO_CONTEXT_MATCH_TAG2, "2" },
+			{ FW_ISO_CONTEXT_MATCH_TAG3, "3" }
+		)
+	)
+);
+
+DEFINE_EVENT_CONDITION(isoc_inbound_start_template, isoc_inbound_single_start,
+	TP_PROTO(const struct fw_iso_context *ctx, int cycle_match, unsigned int sync, unsigned int tags),
+	TP_ARGS(ctx, cycle_match, sync, tags),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE)
+);
+
+DEFINE_EVENT_CONDITION(isoc_inbound_start_template, isoc_inbound_multiple_start,
+	TP_PROTO(const struct fw_iso_context *ctx, int cycle_match, unsigned int sync, unsigned int tags),
+	TP_ARGS(ctx, cycle_match, sync, tags),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
+);
+
+DECLARE_EVENT_CLASS(isoc_stop_template,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_STRUCT__entry(
+		__field(u64, context)
+		__field(u8, card_index)
+	),
+	TP_fast_assign(
+		__entry->context = (uintptr_t)ctx;
+		__entry->card_index = ctx->card->index;
+	),
+	TP_printk(
+		"context=0x%llx card_index=%u",
+		__entry->context,
+		__entry->card_index
+	)
+)
+
+DEFINE_EVENT_CONDITION(isoc_stop_template, isoc_outbound_stop,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT)
+);
+
+DEFINE_EVENT_CONDITION(isoc_stop_template, isoc_inbound_single_stop,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE)
+);
+
+DEFINE_EVENT_CONDITION(isoc_stop_template, isoc_inbound_multiple_stop,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
+);
+
 #undef QUADLET_SIZE
 
 #endif // _FIREWIRE_TRACE_EVENT_H
-- 
cgit v1.2.3


From c0b0ce6c47903ed2b9028933793801b3b4e72ca1 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Mon, 24 Jun 2024 07:08:55 +0900
Subject: firewire: core: add tracepoints events for flushing of isochronous
 context

It is helpful to trace the flushing of isochronous context when the core
function is requested them by both in-kernel unit drivers and userspace
applications.

This commit adds some tracepoints events for the aim.

Link: https://lore.kernel.org/r/20240623220859.851685-5-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index c984b7d6380f..b404e6324671 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -670,6 +670,42 @@ DEFINE_EVENT_CONDITION(isoc_stop_template, isoc_inbound_multiple_stop,
 	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
 );
 
+DECLARE_EVENT_CLASS(isoc_flush_template,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_STRUCT__entry(
+		__field(u64, context)
+		__field(u8, card_index)
+	),
+	TP_fast_assign(
+		__entry->context = (uintptr_t)ctx;
+		__entry->card_index = ctx->card->index;
+	),
+	TP_printk(
+		"context=0x%llx card_index=%u",
+		__entry->context,
+		__entry->card_index
+	)
+);
+
+DEFINE_EVENT_CONDITION(isoc_flush_template, isoc_outbound_flush,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT)
+);
+
+DEFINE_EVENT_CONDITION(isoc_flush_template, isoc_inbound_single_flush,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE)
+);
+
+DEFINE_EVENT_CONDITION(isoc_flush_template, isoc_inbound_multiple_flush,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
+);
+
 #undef QUADLET_SIZE
 
 #endif // _FIREWIRE_TRACE_EVENT_H
-- 
cgit v1.2.3


From 8320b63e028f62542942941b51951006403d8b4f Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Mon, 24 Jun 2024 07:08:56 +0900
Subject: firewire: core: add tracepoints events for flushing completions of
 isochronous context

It is helpful to trace the flushing completions of isochronous context when
the core function is requested them by both in-kernel unit drivers and
userspace applications.

This commit adds some tracepoints events for the aim.

Link: https://lore.kernel.org/r/20240623220859.851685-6-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index b404e6324671..ad1546120aa3 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -706,6 +706,42 @@ DEFINE_EVENT_CONDITION(isoc_flush_template, isoc_inbound_multiple_flush,
 	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
 );
 
+DECLARE_EVENT_CLASS(isoc_flush_completions_template,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_STRUCT__entry(
+		__field(u64, context)
+		__field(u8, card_index)
+	),
+	TP_fast_assign(
+		__entry->context = (uintptr_t)ctx;
+		__entry->card_index = ctx->card->index;
+	),
+	TP_printk(
+		"context=0x%llx card_index=%u",
+		__entry->context,
+		__entry->card_index
+	)
+);
+
+DEFINE_EVENT_CONDITION(isoc_flush_completions_template, isoc_outbound_flush_completions,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT)
+);
+
+DEFINE_EVENT_CONDITION(isoc_flush_completions_template, isoc_inbound_single_flush_completions,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE)
+);
+
+DEFINE_EVENT_CONDITION(isoc_flush_completions_template, isoc_inbound_multiple_flush_completions,
+	TP_PROTO(const struct fw_iso_context *ctx),
+	TP_ARGS(ctx),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
+);
+
 #undef QUADLET_SIZE
 
 #endif // _FIREWIRE_TRACE_EVENT_H
-- 
cgit v1.2.3


From 1f3c0d794df6d85834a0664db3658c422ec5c8d8 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Mon, 24 Jun 2024 07:08:57 +0900
Subject: firewire: core: add tracepoints events for queueing packets of
 isochronous context

It is helpful to trace the queueing packets of isochronous context when
the core function is requested them by both in-kernel unit drivers and
userspace applications.

This commit adds some tracepoints events for the aim.

Link: https://lore.kernel.org/r/20240623220859.851685-7-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 79 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index ad1546120aa3..0381b3ca4d0e 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -742,6 +742,85 @@ DEFINE_EVENT_CONDITION(isoc_flush_completions_template, isoc_inbound_multiple_fl
 	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
 );
 
+#define TP_STRUCT__entry_iso_packet(ctx, buffer_offset, packet)				\
+	TP_STRUCT__entry(								\
+		__field(u64, context)							\
+		__field(u8, card_index)							\
+		__field(u32, buffer_offset)						\
+		__field(bool, interrupt)						\
+		__field(bool, skip)							\
+		__field(u8, sy)								\
+		__field(u8, tag)							\
+		__dynamic_array(u32, header, packet->header_length / QUADLET_SIZE)	\
+	)
+
+#define TP_fast_assign_iso_packet(ctx, buffer_offset, packet)		\
+	TP_fast_assign(							\
+		__entry->context = (uintptr_t)ctx;			\
+		__entry->card_index = ctx->card->index;			\
+		__entry->buffer_offset = buffer_offset;			\
+		__entry->interrupt = packet->interrupt;			\
+		__entry->skip = packet->skip;				\
+		__entry->sy = packet->sy;				\
+		__entry->tag = packet->tag;				\
+		memcpy(__get_dynamic_array(header), packet->header,	\
+		       __get_dynamic_array_len(header));		\
+	)
+
+TRACE_EVENT_CONDITION(isoc_outbound_queue,
+	TP_PROTO(const struct fw_iso_context *ctx, unsigned long buffer_offset, const struct fw_iso_packet *packet),
+	TP_ARGS(ctx, buffer_offset, packet),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT),
+	TP_STRUCT__entry_iso_packet(ctx, buffer_offset, packet),
+	TP_fast_assign_iso_packet(ctx, buffer_offset, packet),
+	TP_printk(
+		"context=0x%llx card_index=%u buffer_offset=0x%x interrupt=%s skip=%s sy=%d tag=%u header=%s",
+		__entry->context,
+		__entry->card_index,
+		__entry->buffer_offset,
+		__entry->interrupt ? "true" : "false",
+		__entry->skip ? "true" : "false",
+		__entry->sy,
+		__entry->tag,
+		__print_array(__get_dynamic_array(header),
+			      __get_dynamic_array_len(header) / QUADLET_SIZE, QUADLET_SIZE)
+	)
+);
+
+TRACE_EVENT_CONDITION(isoc_inbound_single_queue,
+	TP_PROTO(const struct fw_iso_context *ctx, unsigned long buffer_offset, const struct fw_iso_packet *packet),
+	TP_ARGS(ctx, buffer_offset, packet),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE),
+	TP_STRUCT__entry_iso_packet(ctx, buffer_offset, packet),
+	TP_fast_assign_iso_packet(ctx, buffer_offset, packet),
+	TP_printk(
+		"context=0x%llx card_index=%u buffer_offset=0x%x interrupt=%s skip=%s",
+		__entry->context,
+		__entry->card_index,
+		__entry->buffer_offset,
+		__entry->interrupt ? "true" : "false",
+		__entry->skip ? "true" : "false"
+	)
+);
+
+TRACE_EVENT_CONDITION(isoc_inbound_multiple_queue,
+	TP_PROTO(const struct fw_iso_context *ctx, unsigned long buffer_offset, const struct fw_iso_packet *packet),
+	TP_ARGS(ctx, buffer_offset, packet),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL),
+	TP_STRUCT__entry_iso_packet(ctx, buffer_offset, packet),
+	TP_fast_assign_iso_packet(ctx, buffer_offset, packet),
+	TP_printk(
+		"context=0x%llx card_index=%u buffer_offset=0x%x interrupt=%s",
+		__entry->context,
+		__entry->card_index,
+		__entry->buffer_offset,
+		__entry->interrupt ? "true" : "false"
+	)
+);
+
+#undef TP_STRUCT__entry_iso_packet
+#undef TP_fast_assign_iso_packet
+
 #undef QUADLET_SIZE
 
 #endif // _FIREWIRE_TRACE_EVENT_H
-- 
cgit v1.2.3


From daf763c2d6d1dc50586e351aca868c7786d3c84e Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Mon, 24 Jun 2024 07:08:58 +0900
Subject: firewire: core: add tracepoints events for completions of packets in
 isochronous context

It is helpful to trace completion of packets in isochronous context when
the core function is requested them by both in-kernel units driver and
userspace applications.

This commit adds some tracepoints events for the aim.

Link: https://lore.kernel.org/r/20240623220859.851685-8-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 78 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index 0381b3ca4d0e..d9158a134beb 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -821,6 +821,84 @@ TRACE_EVENT_CONDITION(isoc_inbound_multiple_queue,
 #undef TP_STRUCT__entry_iso_packet
 #undef TP_fast_assign_iso_packet
 
+#ifndef show_cause
+enum fw_iso_context_completions_cause {
+	FW_ISO_CONTEXT_COMPLETIONS_CAUSE_FLUSH = 0,
+	FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ,
+	FW_ISO_CONTEXT_COMPLETIONS_CAUSE_HEADER_OVERFLOW,
+};
+#define show_cause(cause) 								\
+	__print_symbolic(cause,								\
+		{ FW_ISO_CONTEXT_COMPLETIONS_CAUSE_FLUSH, "FLUSH" },			\
+		{ FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ, "IRQ" },			\
+		{ FW_ISO_CONTEXT_COMPLETIONS_CAUSE_HEADER_OVERFLOW, "HEADER_OVERFLOW" }	\
+	)
+#endif
+
+DECLARE_EVENT_CLASS(isoc_single_completions_template,
+	TP_PROTO(const struct fw_iso_context *ctx, u16 timestamp, enum fw_iso_context_completions_cause cause, const u32 *header, unsigned int header_length),
+	TP_ARGS(ctx, timestamp, cause, header, header_length),
+	TP_STRUCT__entry(
+		__field(u64, context)
+		__field(u8, card_index)
+		__field(u16, timestamp)
+		__field(u8, cause)
+		__dynamic_array(u32, header, header_length / QUADLET_SIZE)
+	),
+	TP_fast_assign(
+		__entry->context = (uintptr_t)ctx;
+		__entry->card_index = ctx->card->index;
+		__entry->timestamp = timestamp;
+		__entry->cause = cause;
+		memcpy(__get_dynamic_array(header), header, __get_dynamic_array_len(header));
+	),
+	TP_printk(
+		"context=0x%llx card_index=%u timestap=0x%04x cause=%s header=%s",
+		__entry->context,
+		__entry->card_index,
+		__entry->timestamp,
+		show_cause(__entry->cause),
+		__print_array(__get_dynamic_array(header),
+			      __get_dynamic_array_len(header) / QUADLET_SIZE, QUADLET_SIZE)
+	)
+)
+
+DEFINE_EVENT_CONDITION(isoc_single_completions_template, isoc_outbound_completions,
+	TP_PROTO(const struct fw_iso_context *ctx, u16 timestamp, enum fw_iso_context_completions_cause cause, const u32 *header, unsigned int header_length),
+	TP_ARGS(ctx, timestamp, cause, header, header_length),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT)
+);
+
+DEFINE_EVENT_CONDITION(isoc_single_completions_template, isoc_inbound_single_completions,
+	TP_PROTO(const struct fw_iso_context *ctx, u16 timestamp, enum fw_iso_context_completions_cause cause, const u32 *header, unsigned int header_length),
+	TP_ARGS(ctx, timestamp, cause, header, header_length),
+	TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE)
+);
+
+TRACE_EVENT(isoc_inbound_multiple_completions,
+	TP_PROTO(const struct fw_iso_context *ctx, unsigned int completed, enum fw_iso_context_completions_cause cause),
+	TP_ARGS(ctx, completed, cause),
+	TP_STRUCT__entry(
+		__field(u64, context)
+		__field(u8, card_index)
+		__field(u16, completed)
+		__field(u8, cause)
+	),
+	TP_fast_assign(
+		__entry->context = (uintptr_t)ctx;
+		__entry->card_index = ctx->card->index;
+		__entry->completed = completed;
+		__entry->cause = cause;
+	),
+	TP_printk(
+		"context=0x%llx card_index=%u comleted=%u cause=%s",
+		__entry->context,
+		__entry->card_index,
+		__entry->completed,
+		show_cause(__entry->cause)
+	)
+);
+
 #undef QUADLET_SIZE
 
 #endif // _FIREWIRE_TRACE_EVENT_H
-- 
cgit v1.2.3


From 07e4fd4c0592ad57fe4c5033393d30362dbfaa5d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 20 Jun 2024 15:21:51 +0200
Subject: locking/local_lock: Introduce guard definition for local_lock.

Introduce lock guard definition for local_lock_t. There are no users
yet.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20240620132727.660738-2-bigeasy@linutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/local_lock.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
index e55010fa7329..82366a37f447 100644
--- a/include/linux/local_lock.h
+++ b/include/linux/local_lock.h
@@ -51,4 +51,15 @@
 #define local_unlock_irqrestore(lock, flags)			\
 	__local_unlock_irqrestore(lock, flags)
 
+DEFINE_GUARD(local_lock, local_lock_t __percpu*,
+	     local_lock(_T),
+	     local_unlock(_T))
+DEFINE_GUARD(local_lock_irq, local_lock_t __percpu*,
+	     local_lock_irq(_T),
+	     local_unlock_irq(_T))
+DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu,
+		    local_lock_irqsave(_T->lock, _T->flags),
+		    local_unlock_irqrestore(_T->lock, _T->flags),
+		    unsigned long flags)
+
 #endif
-- 
cgit v1.2.3


From c5bcab7558220bbad8bd4863576afd1b340ce29f Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 20 Jun 2024 15:21:52 +0200
Subject: locking/local_lock: Add local nested BH locking infrastructure.

Add local_lock_nested_bh() locking. It is based on local_lock_t and the
naming follows the preempt_disable_nested() example.

For !PREEMPT_RT + !LOCKDEP it is a per-CPU annotation for locking
assumptions based on local_bh_disable(). The macro is optimized away
during compilation.
For !PREEMPT_RT + LOCKDEP the local_lock_nested_bh() is reduced to
the usual lock-acquire plus lockdep_assert_in_softirq() - ensuring that
BH is disabled.

For PREEMPT_RT local_lock_nested_bh() acquires the specified per-CPU
lock. It does not disable CPU migration because it relies on
local_bh_disable() disabling CPU migration.
With LOCKDEP it performans the usual lockdep checks as with !PREEMPT_RT.
Due to include hell the softirq check has been moved spinlock.c.

The intention is to use this locking in places where locking of a per-CPU
variable relies on BH being disabled. Instead of treating disabled
bottom halves as a big per-CPU lock, PREEMPT_RT can use this to reduce
the locking scope to what actually needs protecting.
A side effect is that it also documents the protection scope of the
per-CPU variables.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20240620132727.660738-3-bigeasy@linutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/local_lock.h          | 10 ++++++++++
 include/linux/local_lock_internal.h | 31 +++++++++++++++++++++++++++++++
 include/linux/lockdep.h             |  3 +++
 3 files changed, 44 insertions(+)

(limited to 'include')

diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
index 82366a37f447..091dc0b6bdfb 100644
--- a/include/linux/local_lock.h
+++ b/include/linux/local_lock.h
@@ -62,4 +62,14 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu,
 		    local_unlock_irqrestore(_T->lock, _T->flags),
 		    unsigned long flags)
 
+#define local_lock_nested_bh(_lock)				\
+	__local_lock_nested_bh(_lock)
+
+#define local_unlock_nested_bh(_lock)				\
+	__local_unlock_nested_bh(_lock)
+
+DEFINE_GUARD(local_lock_nested_bh, local_lock_t __percpu*,
+	     local_lock_nested_bh(_T),
+	     local_unlock_nested_bh(_T))
+
 #endif
diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
index 975e33b793a7..8dd71fbbb6d2 100644
--- a/include/linux/local_lock_internal.h
+++ b/include/linux/local_lock_internal.h
@@ -62,6 +62,17 @@ do {								\
 	local_lock_debug_init(lock);				\
 } while (0)
 
+#define __spinlock_nested_bh_init(lock)				\
+do {								\
+	static struct lock_class_key __key;			\
+								\
+	debug_check_no_locks_freed((void *)lock, sizeof(*lock));\
+	lockdep_init_map_type(&(lock)->dep_map, #lock, &__key,  \
+			      0, LD_WAIT_CONFIG, LD_WAIT_INV,	\
+			      LD_LOCK_NORMAL);			\
+	local_lock_debug_init(lock);				\
+} while (0)
+
 #define __local_lock(lock)					\
 	do {							\
 		preempt_disable();				\
@@ -98,6 +109,15 @@ do {								\
 		local_irq_restore(flags);			\
 	} while (0)
 
+#define __local_lock_nested_bh(lock)				\
+	do {							\
+		lockdep_assert_in_softirq();			\
+		local_lock_acquire(this_cpu_ptr(lock));	\
+	} while (0)
+
+#define __local_unlock_nested_bh(lock)				\
+	local_lock_release(this_cpu_ptr(lock))
+
 #else /* !CONFIG_PREEMPT_RT */
 
 /*
@@ -138,4 +158,15 @@ typedef spinlock_t local_lock_t;
 
 #define __local_unlock_irqrestore(lock, flags)	__local_unlock(lock)
 
+#define __local_lock_nested_bh(lock)				\
+do {								\
+	lockdep_assert_in_softirq_func();			\
+	spin_lock(this_cpu_ptr(lock));				\
+} while (0)
+
+#define __local_unlock_nested_bh(lock)				\
+do {								\
+	spin_unlock(this_cpu_ptr((lock)));			\
+} while (0)
+
 #endif /* CONFIG_PREEMPT_RT */
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 08b0d1d9d78b..3f5a551579cc 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -600,6 +600,8 @@ do {									\
 		     (!in_softirq() || in_irq() || in_nmi()));		\
 } while (0)
 
+extern void lockdep_assert_in_softirq_func(void);
+
 #else
 # define might_lock(lock) do { } while (0)
 # define might_lock_read(lock) do { } while (0)
@@ -613,6 +615,7 @@ do {									\
 # define lockdep_assert_preemption_enabled() do { } while (0)
 # define lockdep_assert_preemption_disabled() do { } while (0)
 # define lockdep_assert_in_softirq() do { } while (0)
+# define lockdep_assert_in_softirq_func() do { } while (0)
 #endif
 
 #ifdef CONFIG_PROVE_RAW_LOCK_NESTING
-- 
cgit v1.2.3


From ebad6d0334793f16a16e5320182f665292a31e0c Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 20 Jun 2024 15:21:56 +0200
Subject: net/ipv4: Use nested-BH locking for ipv4_tcp_sk.

ipv4_tcp_sk is a per-CPU variable and relies on disabled BH for its
locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT
this data structure requires explicit locking.

Make a struct with a sock member (original ipv4_tcp_sk) and a
local_lock_t and use local_lock_nested_bh() for locking. This change
adds only lockdep coverage and does not alter the functional behaviour
for !PREEMPT_RT.

Cc: David Ahern <dsahern@kernel.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20240620132727.660738-7-bigeasy@linutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sock.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index b30ea0c342a6..cce23ac4d514 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -544,6 +544,11 @@ struct sock {
 	netns_tracker		ns_tracker;
 };
 
+struct sock_bh_locked {
+	struct sock *sock;
+	local_lock_t bh_lock;
+};
+
 enum sk_pacing {
 	SK_PACING_NONE		= 0,
 	SK_PACING_NEEDED	= 1,
-- 
cgit v1.2.3


From ecefbc09e8ee768ae85b7bb7a1de8c8287397d68 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 20 Jun 2024 15:21:58 +0200
Subject: net: softnet_data: Make xmit per task.

Softirq is preemptible on PREEMPT_RT. Without a per-CPU lock in
local_bh_disable() there is no guarantee that only one device is
transmitting at a time.
With preemption and multiple senders it is possible that the per-CPU
`recursion' counter gets incremented by different threads and exceeds
XMIT_RECURSION_LIMIT leading to a false positive recursion alert.
The `more' member is subject to similar problems if set by one thread
for one driver and wrongly used by another driver within another thread.

Instead of adding a lock to protect the per-CPU variable it is simpler
to make xmit per-task. Sending and receiving skbs happens always
in thread context anyway.

Having a lock to protected the per-CPU counter would block/ serialize two
sending threads needlessly. It would also require a recursive lock to
ensure that the owner can increment the counter further.

Make the softnet_data.xmit a task_struct member on PREEMPT_RT. Add
needed wrapper.

Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20240620132727.660738-9-bigeasy@linutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h      | 42 +++++++++++++++++++++++++++++++-----------
 include/linux/netdevice_xmit.h | 13 +++++++++++++
 include/linux/sched.h          |  5 ++++-
 3 files changed, 48 insertions(+), 12 deletions(-)
 create mode 100644 include/linux/netdevice_xmit.h

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c83b390191d4..f6fc9066147d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -43,6 +43,7 @@
 
 #include <linux/netdev_features.h>
 #include <linux/neighbour.h>
+#include <linux/netdevice_xmit.h>
 #include <uapi/linux/netdevice.h>
 #include <uapi/linux/if_bonding.h>
 #include <uapi/linux/pkt_cls.h>
@@ -3223,13 +3224,7 @@ struct softnet_data {
 	struct sk_buff_head	xfrm_backlog;
 #endif
 	/* written and read only by owning cpu: */
-	struct {
-		u16 recursion;
-		u8  more;
-#ifdef CONFIG_NET_EGRESS
-		u8  skip_txqueue;
-#endif
-	} xmit;
+	struct netdev_xmit xmit;
 #ifdef CONFIG_RPS
 	/* input_queue_head should be written by cpu owning this struct,
 	 * and only read by other cpus. Worth using a cache line.
@@ -3257,10 +3252,18 @@ struct softnet_data {
 
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
+#ifndef CONFIG_PREEMPT_RT
 static inline int dev_recursion_level(void)
 {
 	return this_cpu_read(softnet_data.xmit.recursion);
 }
+#else
+static inline int dev_recursion_level(void)
+{
+	return current->net_xmit.recursion;
+}
+
+#endif
 
 void __netif_schedule(struct Qdisc *q);
 void netif_schedule_queue(struct netdev_queue *txq);
@@ -4872,18 +4875,35 @@ static inline ktime_t netdev_get_tstamp(struct net_device *dev,
 	return hwtstamps->hwtstamp;
 }
 
-static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
-					      struct sk_buff *skb, struct net_device *dev,
-					      bool more)
+#ifndef CONFIG_PREEMPT_RT
+static inline void netdev_xmit_set_more(bool more)
 {
 	__this_cpu_write(softnet_data.xmit.more, more);
-	return ops->ndo_start_xmit(skb, dev);
 }
 
 static inline bool netdev_xmit_more(void)
 {
 	return __this_cpu_read(softnet_data.xmit.more);
 }
+#else
+static inline void netdev_xmit_set_more(bool more)
+{
+	current->net_xmit.more = more;
+}
+
+static inline bool netdev_xmit_more(void)
+{
+	return current->net_xmit.more;
+}
+#endif
+
+static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
+					      struct sk_buff *skb, struct net_device *dev,
+					      bool more)
+{
+	netdev_xmit_set_more(more);
+	return ops->ndo_start_xmit(skb, dev);
+}
 
 static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
 					    struct netdev_queue *txq, bool more)
diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h
new file mode 100644
index 000000000000..38325e070296
--- /dev/null
+++ b/include/linux/netdevice_xmit.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_NETDEVICE_XMIT_H
+#define _LINUX_NETDEVICE_XMIT_H
+
+struct netdev_xmit {
+	u16 recursion;
+	u8  more;
+#ifdef CONFIG_NET_EGRESS
+	u8  skip_txqueue;
+#endif
+};
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61591ac6eab6..5187486c2522 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -36,6 +36,7 @@
 #include <linux/signal_types.h>
 #include <linux/syscall_user_dispatch_types.h>
 #include <linux/mm_types_task.h>
+#include <linux/netdevice_xmit.h>
 #include <linux/task_io_accounting.h>
 #include <linux/posix-timers_types.h>
 #include <linux/restart_block.h>
@@ -975,7 +976,9 @@ struct task_struct {
 	/* delay due to memory thrashing */
 	unsigned                        in_thrashing:1;
 #endif
-
+#ifdef CONFIG_PREEMPT_RT
+	struct netdev_xmit		net_xmit;
+#endif
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
 	struct restart_block		restart_block;
-- 
cgit v1.2.3


From b22800f9d3b142bf2550dd47ff738b9feedc1093 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 20 Jun 2024 15:22:00 +0200
Subject: dev: Use nested-BH locking for softnet_data.process_queue.

softnet_data::process_queue is a per-CPU variable and relies on disabled
BH for its locking. Without per-CPU locking in local_bh_disable() on
PREEMPT_RT this data structure requires explicit locking.

softnet_data::input_queue_head can be updated lockless. This is fine
because this value is only update CPU local by the local backlog_napi
thread.

Add a local_lock_t to softnet_data and use local_lock_nested_bh() for locking
of process_queue. This change adds only lockdep coverage and does not
alter the functional behaviour for !PREEMPT_RT.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20240620132727.660738-11-bigeasy@linutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f6fc9066147d..4e81660b4462 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3202,6 +3202,7 @@ static inline bool dev_has_header(const struct net_device *dev)
 struct softnet_data {
 	struct list_head	poll_list;
 	struct sk_buff_head	process_queue;
+	local_lock_t		process_queue_bh_lock;
 
 	/* stats */
 	unsigned int		processed;
-- 
cgit v1.2.3


From d1542d4ae4dfdc47c9b3205ebe849ed23af213dd Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 20 Jun 2024 15:22:02 +0200
Subject: seg6: Use nested-BH locking for seg6_bpf_srh_states.

The access to seg6_bpf_srh_states is protected by disabling preemption.
Based on the code, the entry point is input_action_end_bpf() and
every other function (the bpf helper functions bpf_lwt_seg6_*()), that
is accessing seg6_bpf_srh_states, should be called from within
input_action_end_bpf().

input_action_end_bpf() accesses seg6_bpf_srh_states first at the top of
the function and then disables preemption. This looks wrong because if
preemption needs to be disabled as part of the locking mechanism then
the variable shouldn't be accessed beforehand.

Looking at how it is used via test_lwt_seg6local.sh then
input_action_end_bpf() is always invoked from softirq context. If this
is always the case then the preempt_disable() statement is superfluous.
If this is not always invoked from softirq then disabling only
preemption is not sufficient.

Replace the preempt_disable() statement with nested-BH locking. This is
not an equivalent replacement as it assumes that the invocation of
input_action_end_bpf() always occurs in softirq context and thus the
preempt_disable() is superfluous.
Add a local_lock_t the data structure and use local_lock_nested_bh() for
locking. Add lockdep_assert_held() to ensure the lock is held while the
per-CPU variable is referenced in the helper functions.

Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: David Ahern <dsahern@kernel.org>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20240620132727.660738-13-bigeasy@linutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/seg6_local.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h
index 3fab9dec2ec4..888c1ce6f527 100644
--- a/include/net/seg6_local.h
+++ b/include/net/seg6_local.h
@@ -19,6 +19,7 @@ extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 extern bool seg6_bpf_has_valid_srh(struct sk_buff *skb);
 
 struct seg6_bpf_srh_state {
+	local_lock_t bh_lock;
 	struct ipv6_sr_hdr *srh;
 	u16 hdrlen;
 	bool valid;
-- 
cgit v1.2.3


From 401cb7dae8130fd34eb84648e02ab4c506df7d5e Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 20 Jun 2024 15:22:04 +0200
Subject: net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The XDP redirect process is two staged:
- bpf_prog_run_xdp() is invoked to run a eBPF program which inspects the
  packet and makes decisions. While doing that, the per-CPU variable
  bpf_redirect_info is used.

- Afterwards xdp_do_redirect() is invoked and accesses bpf_redirect_info
  and it may also access other per-CPU variables like xskmap_flush_list.

At the very end of the NAPI callback, xdp_do_flush() is invoked which
does not access bpf_redirect_info but will touch the individual per-CPU
lists.

The per-CPU variables are only used in the NAPI callback hence disabling
bottom halves is the only protection mechanism. Users from preemptible
context (like cpu_map_kthread_run()) explicitly disable bottom halves
for protections reasons.
Without locking in local_bh_disable() on PREEMPT_RT this data structure
requires explicit locking.

PREEMPT_RT has forced-threaded interrupts enabled and every
NAPI-callback runs in a thread. If each thread has its own data
structure then locking can be avoided.

Create a struct bpf_net_context which contains struct bpf_redirect_info.
Define the variable on stack, use bpf_net_ctx_set() to save a pointer to
it, bpf_net_ctx_clear() removes it again.
The bpf_net_ctx_set() may nest. For instance a function can be used from
within NET_RX_SOFTIRQ/ net_rx_action which uses bpf_net_ctx_set() and
NET_TX_SOFTIRQ which does not. Therefore only the first invocations
updates the pointer.
Use bpf_net_ctx_get_ri() as a wrapper to retrieve the current struct
bpf_redirect_info. The returned data structure is zero initialized to
ensure nothing is leaked from stack. This is done on first usage of the
struct. bpf_net_ctx_set() sets bpf_redirect_info::kern_flags to 0 to
note that initialisation is required. First invocation of
bpf_net_ctx_get_ri() will memset() the data structure and update
bpf_redirect_info::kern_flags.
bpf_redirect_info::nh is excluded from memset because it is only used
once BPF_F_NEIGH is set which also sets the nh member. The kern_flags is
moved past nh to exclude it from memset.

The pointer to bpf_net_context is saved task's task_struct. Using
always the bpf_net_context approach has the advantage that there is
almost zero differences between PREEMPT_RT and non-PREEMPT_RT builds.

Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20240620132727.660738-15-bigeasy@linutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/filter.h | 56 +++++++++++++++++++++++++++++++++++++++++---------
 include/linux/sched.h  |  3 +++
 2 files changed, 49 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index b02aea291b7e..0a7f6e4a00b6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -733,21 +733,59 @@ struct bpf_nh_params {
 	};
 };
 
+/* flags for bpf_redirect_info kern_flags */
+#define BPF_RI_F_RF_NO_DIRECT	BIT(0)	/* no napi_direct on return_frame */
+#define BPF_RI_F_RI_INIT	BIT(1)
+
 struct bpf_redirect_info {
 	u64 tgt_index;
 	void *tgt_value;
 	struct bpf_map *map;
 	u32 flags;
-	u32 kern_flags;
 	u32 map_id;
 	enum bpf_map_type map_type;
 	struct bpf_nh_params nh;
+	u32 kern_flags;
 };
 
-DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
+struct bpf_net_context {
+	struct bpf_redirect_info ri;
+};
 
-/* flags for bpf_redirect_info kern_flags */
-#define BPF_RI_F_RF_NO_DIRECT	BIT(0)	/* no napi_direct on return_frame */
+static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx)
+{
+	struct task_struct *tsk = current;
+
+	if (tsk->bpf_net_context != NULL)
+		return NULL;
+	bpf_net_ctx->ri.kern_flags = 0;
+
+	tsk->bpf_net_context = bpf_net_ctx;
+	return bpf_net_ctx;
+}
+
+static inline void bpf_net_ctx_clear(struct bpf_net_context *bpf_net_ctx)
+{
+	if (bpf_net_ctx)
+		current->bpf_net_context = NULL;
+}
+
+static inline struct bpf_net_context *bpf_net_ctx_get(void)
+{
+	return current->bpf_net_context;
+}
+
+static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
+{
+	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+	if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_RI_INIT)) {
+		memset(&bpf_net_ctx->ri, 0, offsetof(struct bpf_net_context, ri.nh));
+		bpf_net_ctx->ri.kern_flags |= BPF_RI_F_RI_INIT;
+	}
+
+	return &bpf_net_ctx->ri;
+}
 
 /* Compute the linear packet data range [data, data_end) which
  * will be accessed by various program types (cls_bpf, act_bpf,
@@ -1018,25 +1056,23 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
 
-void bpf_clear_redirect_map(struct bpf_map *map);
-
 static inline bool xdp_return_frame_no_direct(void)
 {
-	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
 
 	return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT;
 }
 
 static inline void xdp_set_return_frame_no_direct(void)
 {
-	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
 
 	ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT;
 }
 
 static inline void xdp_clear_return_frame_no_direct(void)
 {
-	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
 
 	ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT;
 }
@@ -1592,7 +1628,7 @@ static __always_inline long __bpf_xdp_redirect_map(struct bpf_map *map, u64 inde
 						   u64 flags, const u64 flag_mask,
 						   void *lookup_elem(struct bpf_map *map, u32 key))
 {
-	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	struct bpf_redirect_info *ri = bpf_net_ctx_get_ri();
 	const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX;
 
 	/* Lower bits of the flags are used as return code on lookup failure */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5187486c2522..5ff5e65a4627 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -54,6 +54,7 @@ struct bio_list;
 struct blk_plug;
 struct bpf_local_storage;
 struct bpf_run_ctx;
+struct bpf_net_context;
 struct capture_control;
 struct cfs_rq;
 struct fs_struct;
@@ -1509,6 +1510,8 @@ struct task_struct {
 	/* Used for BPF run context */
 	struct bpf_run_ctx		*bpf_ctx;
 #endif
+	/* Used by BPF for per-TASK xdp storage */
+	struct bpf_net_context		*bpf_net_context;
 
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 	unsigned long			lowest_stack;
-- 
cgit v1.2.3


From 3f9fe37d9e16a6cfd5f4d1f536686ea71db3196f Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 20 Jun 2024 15:22:05 +0200
Subject: net: Move per-CPU flush-lists to bpf_net_context on PREEMPT_RT.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The per-CPU flush lists, which are accessed from within the NAPI callback
(xdp_do_flush() for instance), are per-CPU. There are subject to the
same problem as struct bpf_redirect_info.

Add the per-CPU lists cpu_map_flush_list, dev_map_flush_list and
xskmap_map_flush_list to struct bpf_net_context. Add wrappers for the
access. The lists initialized on first usage (similar to
bpf_net_ctx_get_ri()).

Cc: "Björn Töpel" <bjorn@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@intel.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20240620132727.660738-16-bigeasy@linutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/filter.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0a7f6e4a00b6..c0349522de8f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -736,6 +736,9 @@ struct bpf_nh_params {
 /* flags for bpf_redirect_info kern_flags */
 #define BPF_RI_F_RF_NO_DIRECT	BIT(0)	/* no napi_direct on return_frame */
 #define BPF_RI_F_RI_INIT	BIT(1)
+#define BPF_RI_F_CPU_MAP_INIT	BIT(2)
+#define BPF_RI_F_DEV_MAP_INIT	BIT(3)
+#define BPF_RI_F_XSK_MAP_INIT	BIT(4)
 
 struct bpf_redirect_info {
 	u64 tgt_index;
@@ -750,6 +753,9 @@ struct bpf_redirect_info {
 
 struct bpf_net_context {
 	struct bpf_redirect_info ri;
+	struct list_head cpu_map_flush_list;
+	struct list_head dev_map_flush_list;
+	struct list_head xskmap_map_flush_list;
 };
 
 static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx)
@@ -787,6 +793,42 @@ static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void)
 	return &bpf_net_ctx->ri;
 }
 
+static inline struct list_head *bpf_net_ctx_get_cpu_map_flush_list(void)
+{
+	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+	if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_CPU_MAP_INIT)) {
+		INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list);
+		bpf_net_ctx->ri.kern_flags |= BPF_RI_F_CPU_MAP_INIT;
+	}
+
+	return &bpf_net_ctx->cpu_map_flush_list;
+}
+
+static inline struct list_head *bpf_net_ctx_get_dev_flush_list(void)
+{
+	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+	if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_DEV_MAP_INIT)) {
+		INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list);
+		bpf_net_ctx->ri.kern_flags |= BPF_RI_F_DEV_MAP_INIT;
+	}
+
+	return &bpf_net_ctx->dev_map_flush_list;
+}
+
+static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void)
+{
+	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+
+	if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_XSK_MAP_INIT)) {
+		INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list);
+		bpf_net_ctx->ri.kern_flags |= BPF_RI_F_XSK_MAP_INIT;
+	}
+
+	return &bpf_net_ctx->xskmap_map_flush_list;
+}
+
 /* Compute the linear packet data range [data, data_end) which
  * will be accessed by various program types (cls_bpf, act_bpf,
  * lwt, ...). Subsystems allowing direct data access must (!)
-- 
cgit v1.2.3


From 26b97668e5339434e5df8ddc7b1898a37a350112 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 24 Jun 2024 13:47:22 -0600
Subject: io_uring: remove dead struct io_submit_state member

When the intermediate CQE aux cache got removed, any usage of the this
member went away. As it isn't used anymore, kill it.

Fixes: 902ce82c2aa1 ("io_uring: get rid of intermediate aux cqe caches")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index b48570eaa449..7abdc0927124 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -207,7 +207,6 @@ struct io_submit_state {
 	bool			need_plug;
 	bool			cq_flush;
 	unsigned short		submit_nr;
-	unsigned int		cqes_count;
 	struct blk_plug		plug;
 };
 
-- 
cgit v1.2.3


From 399ab86ea55039f9d0a5f621a68cb4631f796f37 Mon Sep 17 00:00:00 2001
From: Jeff Xu <jeffxu@chromium.org>
Date: Fri, 14 Jun 2024 23:20:14 +0000
Subject: /proc/pid/smaps: add mseal info for vma
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add sl in /proc/pid/smaps to indicate vma is sealed

Link: https://lkml.kernel.org/r/20240614232014.806352-2-jeffxu@google.com
Fixes: 8be7258aad44 ("mseal: add mseal syscall")
Signed-off-by: Jeff Xu <jeffxu@chromium.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Cc: Jann Horn <jannh@google.com>
Cc: Jorge Lucangeli Obes <jorgelo@chromium.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Röttger <sroettger@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9a5652c5fadd..eb7c96d24ac0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -406,6 +406,11 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_ALLOW_ANY_UNCACHED		VM_NONE
 #endif
 
+#ifdef CONFIG_64BIT
+/* VM is sealed, in vm_flags */
+#define VM_SEALED	_BITUL(63)
+#endif
+
 /* Bits set in the VMA until the stack is in its final location */
 #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
 
-- 
cgit v1.2.3


From ff202303c398ed56386ca4954154de9a96eb732a Mon Sep 17 00:00:00 2001
From: Stephen Brennan <stephen.s.brennan@oracle.com>
Date: Fri, 7 Jun 2024 13:29:53 -0700
Subject: mm: convert page type macros to enum

Changing PG_slab from a page flag to a page type in commit 46df8e73a4a3
("mm: free up PG_slab") in has the unintended consequence of removing the
PG_slab constant from kernel debuginfo.  The commit does add the value to
the vmcoreinfo note, which allows debuggers to find the value without
hardcoding it.  However it's most flexible to continue representing the
constant with an enum.  To that end, convert the page type fields into an
enum.  Debuggers will now be able to detect that PG_slab's type has
changed from enum pageflags to enum pagetype.

Link: https://lkml.kernel.org/r/20240607202954.1198180-1-stephen.s.brennan@oracle.com
Fixes: 46df8e73a4a3 ("mm: free up PG_slab")
Signed-off-by: Stephen Brennan <stephen.s.brennan@oracle.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hao Ge <gehao@kylinos.cn>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Omar Sandoval <osandov@osandov.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 104078afe0b1..b9e914e1face 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -944,15 +944,18 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
  * mistaken for a page type value.
  */
 
-#define PAGE_TYPE_BASE	0xf0000000
-/* Reserve		0x0000007f to catch underflows of _mapcount */
-#define PAGE_MAPCOUNT_RESERVE	-128
-#define PG_buddy	0x00000080
-#define PG_offline	0x00000100
-#define PG_table	0x00000200
-#define PG_guard	0x00000400
-#define PG_hugetlb	0x00000800
-#define PG_slab		0x00001000
+enum pagetype {
+	PG_buddy	= 0x00000080,
+	PG_offline	= 0x00000100,
+	PG_table	= 0x00000200,
+	PG_guard	= 0x00000400,
+	PG_hugetlb	= 0x00000800,
+	PG_slab		= 0x00001000,
+
+	PAGE_TYPE_BASE	= 0xf0000000,
+	/* Reserve 0x0000007f to catch underflows of _mapcount */
+	PAGE_MAPCOUNT_RESERVE	= -128,
+};
 
 #define PageType(page, flag)						\
 	((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
-- 
cgit v1.2.3


From bf14ed81f571f8dba31cd72ab2e50fbcc877cc31 Mon Sep 17 00:00:00 2001
From: yangge <yangge1116@126.com>
Date: Thu, 20 Jun 2024 08:59:50 +0800
Subject: mm/page_alloc: Separate THP PCP into movable and non-movable
 categories

Since commit 5d0a661d808f ("mm/page_alloc: use only one PCP list for
THP-sized allocations") no longer differentiates the migration type of
pages in THP-sized PCP list, it's possible that non-movable allocation
requests may get a CMA page from the list, in some cases, it's not
acceptable.

If a large number of CMA memory are configured in system (for example, the
CMA memory accounts for 50% of the system memory), starting a virtual
machine with device passthrough will get stuck.  During starting the
virtual machine, it will call pin_user_pages_remote(..., FOLL_LONGTERM,
...) to pin memory.  Normally if a page is present and in CMA area,
pin_user_pages_remote() will migrate the page from CMA area to non-CMA
area because of FOLL_LONGTERM flag.  But if non-movable allocation
requests return CMA memory, migrate_longterm_unpinnable_pages() will
migrate a CMA page to another CMA page, which will fail to pass the check
in check_and_migrate_movable_pages() and cause migration endless.

Call trace:
pin_user_pages_remote
--__gup_longterm_locked // endless loops in this function
----_get_user_pages_locked
----check_and_migrate_movable_pages
------migrate_longterm_unpinnable_pages
--------alloc_migration_target

This problem will also have a negative impact on CMA itself.  For example,
when CMA is borrowed by THP, and we need to reclaim it through cma_alloc()
or dma_alloc_coherent(), we must move those pages out to ensure CMA's
users can retrieve that contigous memory.  Currently, CMA's memory is
occupied by non-movable pages, meaning we can't relocate them.  As a
result, cma_alloc() is more likely to fail.

To fix the problem above, we add one PCP list for THP, which will not
introduce a new cacheline for struct per_cpu_pages.  THP will have 2 PCP
lists, one PCP list is used by MOVABLE allocation, and the other PCP list
is used by UNMOVABLE allocation.  MOVABLE allocation contains GPF_MOVABLE,
and UNMOVABLE allocation contains GFP_UNMOVABLE and GFP_RECLAIMABLE.

Link: https://lkml.kernel.org/r/1718845190-4456-1-git-send-email-yangge1116@126.com
Fixes: 5d0a661d808f ("mm/page_alloc: use only one PCP list for THP-sized allocations")
Signed-off-by: yangge <yangge1116@126.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <21cnbao@gmail.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8f9c9590a42c..586a8f0104d7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -654,13 +654,12 @@ enum zone_watermarks {
 };
 
 /*
- * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. One additional list
- * for THP which will usually be GFP_MOVABLE. Even if it is another type,
- * it should not contribute to serious fragmentation causing THP allocation
- * failures.
+ * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. Two additional lists
+ * are added for THP. One PCP list is used by GPF_MOVABLE, and the other PCP list
+ * is used by GFP_UNMOVABLE and GFP_RECLAIMABLE.
  */
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define NR_PCP_THP 1
+#define NR_PCP_THP 2
 #else
 #define NR_PCP_THP 0
 #endif
-- 
cgit v1.2.3


From cf28d7716e0c777f593948eea109dc273047dac7 Mon Sep 17 00:00:00 2001
From: Wei-Hsin Yeh <weihsinyeh168@gmail.com>
Date: Sun, 12 May 2024 13:42:11 +0800
Subject: include/linux/jhash.h: fix typos

Drop one '-' to adhere to coding style.
Replace 'arbitray' with 'arbitrary'.

Link: https://lkml.kernel.org/r/20240512054211.24726-1-weihsinyeh168@gmail.com
Signed-off-by: Wei-Hsin Yeh <weihsinyeh168@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/jhash.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index ab7f8c152b89..fa26a2dd3b52 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -31,7 +31,7 @@
 /* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
 #define jhash_mask(n)   (jhash_size(n)-1)
 
-/* __jhash_mix -- mix 3 32-bit values reversibly. */
+/* __jhash_mix - mix 3 32-bit values reversibly. */
 #define __jhash_mix(a, b, c)			\
 {						\
 	a -= c;  a ^= rol32(c, 4);  c += b;	\
@@ -60,7 +60,7 @@
 /* jhash - hash an arbitrary key
  * @k: sequence of bytes as key
  * @length: the length of the key
- * @initval: the previous hash, or an arbitray value
+ * @initval: the previous hash, or an arbitrary value
  *
  * The generic version, hashes an arbitrary sequence of bytes.
  * No alignment or length assumptions are made about the input key.
@@ -110,7 +110,7 @@ static inline u32 jhash(const void *key, u32 length, u32 initval)
 /* jhash2 - hash an array of u32's
  * @k: the key which must be an array of u32's
  * @length: the number of u32's in the key
- * @initval: the previous hash, or an arbitray value
+ * @initval: the previous hash, or an arbitrary value
  *
  * Returns the hash value of the key.
  */
-- 
cgit v1.2.3


From 873ce25766019dd017c1a3a10c19ac3fc4bf24aa Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Fri, 24 May 2024 23:29:46 +0800
Subject: lib min_heap: add type safe interface

Implement a type-safe interface for min_heap using strong type pointers
instead of void * in the data field.  This change includes adding small
macro wrappers around functions, enabling the use of __minheap_cast and
__minheap_obj_size macros for type casting and obtaining element size.
This implementation removes the necessity of passing element size in
min_heap_callbacks.  Additionally, introduce the MIN_HEAP_PREALLOCATED
macro for preallocating some elements.

Link: https://lkml.kernel.org/ioyfizrzq7w7mjrqcadtzsfgpuntowtjdw5pgn4qhvsdp4mqqg@nrlek5vmisbu
Link: https://lkml.kernel.org/r/20240524152958.919343-5-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 79 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 51 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index d52daf45861b..92830f41642a 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -7,45 +7,53 @@
 #include <linux/types.h>
 
 /**
- * struct min_heap - Data structure to hold a min-heap.
- * @data: Start of array holding the heap elements.
+ * Data structure to hold a min-heap.
  * @nr: Number of elements currently in the heap.
  * @size: Maximum number of elements that can be held in current storage.
+ * @data: Pointer to the start of array holding the heap elements.
+ * @preallocated: Start of the static preallocated array holding the heap elements.
  */
-struct min_heap {
-	void *data;
-	int nr;
-	int size;
-};
+#define MIN_HEAP_PREALLOCATED(_type, _name, _nr)	\
+struct _name {	\
+	int nr;	\
+	int size;	\
+	_type *data;	\
+	_type preallocated[_nr];	\
+}
+
+#define DEFINE_MIN_HEAP(_type, _name) MIN_HEAP_PREALLOCATED(_type, _name, 0)
+
+typedef DEFINE_MIN_HEAP(char, min_heap_char) min_heap_char;
+
+#define __minheap_cast(_heap)		(typeof((_heap)->data[0]) *)
+#define __minheap_obj_size(_heap)	sizeof((_heap)->data[0])
 
 /**
  * struct min_heap_callbacks - Data/functions to customise the min_heap.
- * @elem_size: The nr of each element in bytes.
  * @less: Partial order function for this heap.
  * @swp: Swap elements function.
  */
 struct min_heap_callbacks {
-	int elem_size;
 	bool (*less)(const void *lhs, const void *rhs);
 	void (*swp)(void *lhs, void *rhs);
 };
 
 /* Sift the element at pos down the heap. */
 static __always_inline
-void min_heapify(struct min_heap *heap, int pos,
+void __min_heapify(min_heap_char *heap, int pos, size_t elem_size,
 		const struct min_heap_callbacks *func)
 {
 	void *left, *right;
 	void *data = heap->data;
-	void *root = data + pos * func->elem_size;
+	void *root = data + pos * elem_size;
 	int i = pos, j;
 
 	/* Find the sift-down path all the way to the leaves. */
 	for (;;) {
 		if (i * 2 + 2 >= heap->nr)
 			break;
-		left = data + (i * 2 + 1) * func->elem_size;
-		right = data + (i * 2 + 2) * func->elem_size;
+		left = data + (i * 2 + 1) * elem_size;
+		right = data + (i * 2 + 2) * elem_size;
 		i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2;
 	}
 
@@ -54,31 +62,37 @@ void min_heapify(struct min_heap *heap, int pos,
 		i = i * 2 + 1;
 
 	/* Backtrack to the correct location. */
-	while (i != pos && func->less(root, data + i * func->elem_size))
+	while (i != pos && func->less(root, data + i * elem_size))
 		i = (i - 1) / 2;
 
 	/* Shift the element into its correct place. */
 	j = i;
 	while (i != pos) {
 		i = (i - 1) / 2;
-		func->swp(data + i * func->elem_size, data + j * func->elem_size);
+		func->swp(data + i * elem_size, data + j * elem_size);
 	}
 }
 
+#define min_heapify(_heap, _pos, _func)	\
+	__min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func)
+
 /* Floyd's approach to heapification that is O(nr). */
 static __always_inline
-void min_heapify_all(struct min_heap *heap,
+void __min_heapify_all(min_heap_char *heap, size_t elem_size,
 		const struct min_heap_callbacks *func)
 {
 	int i;
 
 	for (i = heap->nr / 2 - 1; i >= 0; i--)
-		min_heapify(heap, i, func);
+		__min_heapify(heap, i, elem_size, func);
 }
 
+#define min_heapify_all(_heap, _func)	\
+	__min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func)
+
 /* Remove minimum element from the heap, O(log2(nr)). */
 static __always_inline
-void min_heap_pop(struct min_heap *heap,
+void __min_heap_pop(min_heap_char *heap, size_t elem_size,
 		const struct min_heap_callbacks *func)
 {
 	void *data = heap->data;
@@ -88,27 +102,33 @@ void min_heap_pop(struct min_heap *heap,
 
 	/* Place last element at the root (position 0) and then sift down. */
 	heap->nr--;
-	memcpy(data, data + (heap->nr * func->elem_size), func->elem_size);
-	min_heapify(heap, 0, func);
+	memcpy(data, data + (heap->nr * elem_size), elem_size);
+	__min_heapify(heap, 0, elem_size, func);
 }
 
+#define min_heap_pop(_heap, _func)	\
+	__min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func)
+
 /*
  * Remove the minimum element and then push the given element. The
  * implementation performs 1 sift (O(log2(nr))) and is therefore more
  * efficient than a pop followed by a push that does 2.
  */
 static __always_inline
-void min_heap_pop_push(struct min_heap *heap,
-		const void *element,
+void __min_heap_pop_push(min_heap_char *heap,
+		const void *element, size_t elem_size,
 		const struct min_heap_callbacks *func)
 {
-	memcpy(heap->data, element, func->elem_size);
-	min_heapify(heap, 0, func);
+	memcpy(heap->data, element, elem_size);
+	__min_heapify(heap, 0, elem_size, func);
 }
 
+#define min_heap_pop_push(_heap, _element, _func)	\
+	__min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func)
+
 /* Push an element on to the heap, O(log2(nr)). */
 static __always_inline
-void min_heap_push(struct min_heap *heap, const void *element,
+void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
 		const struct min_heap_callbacks *func)
 {
 	void *data = heap->data;
@@ -120,17 +140,20 @@ void min_heap_push(struct min_heap *heap, const void *element,
 
 	/* Place at the end of data. */
 	pos = heap->nr;
-	memcpy(data + (pos * func->elem_size), element, func->elem_size);
+	memcpy(data + (pos * elem_size), element, elem_size);
 	heap->nr++;
 
 	/* Sift child at pos up. */
 	for (; pos > 0; pos = (pos - 1) / 2) {
-		child = data + (pos * func->elem_size);
-		parent = data + ((pos - 1) / 2) * func->elem_size;
+		child = data + (pos * elem_size);
+		parent = data + ((pos - 1) / 2) * elem_size;
 		if (func->less(parent, child))
 			break;
 		func->swp(parent, child);
 	}
 }
 
+#define min_heap_push(_heap, _element, _func)	\
+	__min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func)
+
 #endif /* _LINUX_MIN_HEAP_H */
-- 
cgit v1.2.3


From e146683ccff9af2d26b70a9ea4c87f5a86eff0c4 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Fri, 24 May 2024 23:29:47 +0800
Subject: lib min_heap: add min_heap_init()

Add min_heap_init() for initializing heap with data, nr, and size.

Link: https://lkml.kernel.org/r/20240524152958.919343-6-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 92830f41642a..b384a4ea2afa 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -38,6 +38,21 @@ struct min_heap_callbacks {
 	void (*swp)(void *lhs, void *rhs);
 };
 
+/* Initialize a min-heap. */
+static __always_inline
+void __min_heap_init(min_heap_char *heap, void *data, int size)
+{
+	heap->nr = 0;
+	heap->size = size;
+	if (data)
+		heap->data = data;
+	else
+		heap->data = heap->preallocated;
+}
+
+#define min_heap_init(_heap, _data, _size)	\
+	__min_heap_init((min_heap_char *)_heap, _data, _size)
+
 /* Sift the element at pos down the heap. */
 static __always_inline
 void __min_heapify(min_heap_char *heap, int pos, size_t elem_size,
-- 
cgit v1.2.3


From 0562d54ddc3dc195f338bd8e816dd8ff154f44ad Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Fri, 24 May 2024 23:29:48 +0800
Subject: lib min_heap: add min_heap_peek()

Add min_heap_peek() to retrieve a pointer to the smallest element.  The
pointer is cast to the appropriate type of heap elements.

Link: https://lkml.kernel.org/r/20240524152958.919343-7-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index b384a4ea2afa..d9c4ae7ad0cc 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -53,6 +53,16 @@ void __min_heap_init(min_heap_char *heap, void *data, int size)
 #define min_heap_init(_heap, _data, _size)	\
 	__min_heap_init((min_heap_char *)_heap, _data, _size)
 
+/* Get the minimum element from the heap. */
+static __always_inline
+void *__min_heap_peek(struct min_heap_char *heap)
+{
+	return heap->nr ? heap->data : NULL;
+}
+
+#define min_heap_peek(_heap)	\
+	(__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap))
+
 /* Sift the element at pos down the heap. */
 static __always_inline
 void __min_heapify(min_heap_char *heap, int pos, size_t elem_size,
-- 
cgit v1.2.3


From b9d720e65a72c9754faf689e0859a5632853f4bc Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Fri, 24 May 2024 23:29:49 +0800
Subject: lib min_heap: add min_heap_full()

Add min_heap_full() which returns a boolean value indicating whether the
heap has reached its maximum capacity.

Link: https://lkml.kernel.org/r/20240524152958.919343-8-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index d9c4ae7ad0cc..f41898c05f5a 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -63,6 +63,16 @@ void *__min_heap_peek(struct min_heap_char *heap)
 #define min_heap_peek(_heap)	\
 	(__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap))
 
+/* Check if the heap is full. */
+static __always_inline
+bool __min_heap_full(min_heap_char *heap)
+{
+	return heap->nr == heap->size;
+}
+
+#define min_heap_full(_heap)	\
+	__min_heap_full((min_heap_char *)_heap)
+
 /* Sift the element at pos down the heap. */
 static __always_inline
 void __min_heapify(min_heap_char *heap, int pos, size_t elem_size,
-- 
cgit v1.2.3


From 267607e87599509a6a39a5f7dd3959365e58af27 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Fri, 24 May 2024 23:29:50 +0800
Subject: lib min_heap: add args for min_heap_callbacks

Add a third parameter 'args' for the 'less' and 'swp' functions in the
'struct min_heap_callbacks'.  This additional parameter allows these
comparison and swap functions to handle extra arguments when necessary.

Link: https://lkml.kernel.org/r/20240524152958.919343-9-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 51 ++++++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index f41898c05f5a..4acd0f4b3faf 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -34,8 +34,8 @@ typedef DEFINE_MIN_HEAP(char, min_heap_char) min_heap_char;
  * @swp: Swap elements function.
  */
 struct min_heap_callbacks {
-	bool (*less)(const void *lhs, const void *rhs);
-	void (*swp)(void *lhs, void *rhs);
+	bool (*less)(const void *lhs, const void *rhs, void *args);
+	void (*swp)(void *lhs, void *rhs, void *args);
 };
 
 /* Initialize a min-heap. */
@@ -76,7 +76,7 @@ bool __min_heap_full(min_heap_char *heap)
 /* Sift the element at pos down the heap. */
 static __always_inline
 void __min_heapify(min_heap_char *heap, int pos, size_t elem_size,
-		const struct min_heap_callbacks *func)
+		const struct min_heap_callbacks *func, void *args)
 {
 	void *left, *right;
 	void *data = heap->data;
@@ -89,7 +89,7 @@ void __min_heapify(min_heap_char *heap, int pos, size_t elem_size,
 			break;
 		left = data + (i * 2 + 1) * elem_size;
 		right = data + (i * 2 + 2) * elem_size;
-		i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2;
+		i = func->less(left, right, args) ? i * 2 + 1 : i * 2 + 2;
 	}
 
 	/* Special case for the last leaf with no sibling. */
@@ -97,38 +97,38 @@ void __min_heapify(min_heap_char *heap, int pos, size_t elem_size,
 		i = i * 2 + 1;
 
 	/* Backtrack to the correct location. */
-	while (i != pos && func->less(root, data + i * elem_size))
+	while (i != pos && func->less(root, data + i * elem_size, args))
 		i = (i - 1) / 2;
 
 	/* Shift the element into its correct place. */
 	j = i;
 	while (i != pos) {
 		i = (i - 1) / 2;
-		func->swp(data + i * elem_size, data + j * elem_size);
+		func->swp(data + i * elem_size, data + j * elem_size, args);
 	}
 }
 
-#define min_heapify(_heap, _pos, _func)	\
-	__min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func)
+#define min_heapify(_heap, _pos, _func, _args)	\
+	__min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args)
 
 /* Floyd's approach to heapification that is O(nr). */
 static __always_inline
 void __min_heapify_all(min_heap_char *heap, size_t elem_size,
-		const struct min_heap_callbacks *func)
+		const struct min_heap_callbacks *func, void *args)
 {
 	int i;
 
 	for (i = heap->nr / 2 - 1; i >= 0; i--)
-		__min_heapify(heap, i, elem_size, func);
+		__min_heapify(heap, i, elem_size, func, args);
 }
 
-#define min_heapify_all(_heap, _func)	\
-	__min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func)
+#define min_heapify_all(_heap, _func, _args)	\
+	__min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
 
 /* Remove minimum element from the heap, O(log2(nr)). */
 static __always_inline
 void __min_heap_pop(min_heap_char *heap, size_t elem_size,
-		const struct min_heap_callbacks *func)
+		const struct min_heap_callbacks *func, void *args)
 {
 	void *data = heap->data;
 
@@ -138,11 +138,11 @@ void __min_heap_pop(min_heap_char *heap, size_t elem_size,
 	/* Place last element at the root (position 0) and then sift down. */
 	heap->nr--;
 	memcpy(data, data + (heap->nr * elem_size), elem_size);
-	__min_heapify(heap, 0, elem_size, func);
+	__min_heapify(heap, 0, elem_size, func, args);
 }
 
-#define min_heap_pop(_heap, _func)	\
-	__min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func)
+#define min_heap_pop(_heap, _func, _args)	\
+	__min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args)
 
 /*
  * Remove the minimum element and then push the given element. The
@@ -152,19 +152,20 @@ void __min_heap_pop(min_heap_char *heap, size_t elem_size,
 static __always_inline
 void __min_heap_pop_push(min_heap_char *heap,
 		const void *element, size_t elem_size,
-		const struct min_heap_callbacks *func)
+		const struct min_heap_callbacks *func,
+		void *args)
 {
 	memcpy(heap->data, element, elem_size);
-	__min_heapify(heap, 0, elem_size, func);
+	__min_heapify(heap, 0, elem_size, func, args);
 }
 
-#define min_heap_pop_push(_heap, _element, _func)	\
-	__min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func)
+#define min_heap_pop_push(_heap, _element, _func, _args)	\
+	__min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
 
 /* Push an element on to the heap, O(log2(nr)). */
 static __always_inline
 void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
-		const struct min_heap_callbacks *func)
+		const struct min_heap_callbacks *func, void *args)
 {
 	void *data = heap->data;
 	void *child, *parent;
@@ -182,13 +183,13 @@ void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
 	for (; pos > 0; pos = (pos - 1) / 2) {
 		child = data + (pos * elem_size);
 		parent = data + ((pos - 1) / 2) * elem_size;
-		if (func->less(parent, child))
+		if (func->less(parent, child, args))
 			break;
-		func->swp(parent, child);
+		func->swp(parent, child, args);
 	}
 }
 
-#define min_heap_push(_heap, _element, _func)	\
-	__min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func)
+#define min_heap_push(_heap, _element, _func, _args)	\
+	__min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
 
 #endif /* _LINUX_MIN_HEAP_H */
-- 
cgit v1.2.3


From eaa0bc7119247942200b9209d40ffd86ace347d4 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Fri, 24 May 2024 23:29:51 +0800
Subject: lib min_heap: add min_heap_sift_up()

Add min_heap_sift_up() to sift up the element at index 'idx' in the
heap.

Link: https://lkml.kernel.org/r/20240524152958.919343-10-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 4acd0f4b3faf..ddd0186afb77 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -111,6 +111,26 @@ void __min_heapify(min_heap_char *heap, int pos, size_t elem_size,
 #define min_heapify(_heap, _pos, _func, _args)	\
 	__min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args)
 
+/* Sift up ith element from the heap, O(log2(nr)). */
+static __always_inline
+void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
+		const struct min_heap_callbacks *func, void *args)
+{
+	void *data = heap->data;
+	size_t parent;
+
+	while (idx) {
+		parent = (idx - 1) / 2;
+		if (func->less(data + parent * elem_size, data + idx * elem_size, args))
+			break;
+		func->swp(data + parent * elem_size, data + idx * elem_size, args);
+		idx = parent;
+	}
+}
+
+#define min_heap_sift_up(_heap, _idx, _func, _args)	\
+	__min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
+
 /* Floyd's approach to heapification that is O(nr). */
 static __always_inline
 void __min_heapify_all(min_heap_char *heap, size_t elem_size,
-- 
cgit v1.2.3


From 420f171031207a13c83560b2ebb32ccc3c8ed6df Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Fri, 24 May 2024 23:29:52 +0800
Subject: lib min_heap: add min_heap_del()

Add min_heap_del() to delete the element at index 'idx' in the heap.

Link: https://lkml.kernel.org/r/20240524152958.919343-11-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index ddd0186afb77..98e838195e7e 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -212,4 +212,28 @@ void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
 #define min_heap_push(_heap, _element, _func, _args)	\
 	__min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args)
 
+/* Remove ith element from the heap, O(log2(nr)). */
+static __always_inline
+bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
+		const struct min_heap_callbacks *func, void *args)
+{
+	void *data = heap->data;
+
+	if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap"))
+		return false;
+
+	/* Place last element at the root (position 0) and then sift down. */
+	heap->nr--;
+	if (idx == heap->nr)
+		return true;
+	func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args);
+	__min_heap_sift_up(heap, elem_size, idx, func, args);
+	__min_heapify(heap, idx, elem_size, func, args);
+
+	return true;
+}
+
+#define min_heap_del(_heap, _idx, _func, _args)	\
+	__min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args)
+
 #endif /* _LINUX_MIN_HEAP_H */
-- 
cgit v1.2.3


From 2eb637c649a3d89c5483dc851ce98d56717a36d2 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Fri, 24 May 2024 23:29:53 +0800
Subject: lib min_heap: update min_heap_push() and min_heap_pop() to return
 bool values

Modify the min_heap_push() and min_heap_pop() to return a boolean value.
They now return false when the operation fails and true when it succeeds.

Link: https://lkml.kernel.org/r/20240524152958.919343-12-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 98e838195e7e..3410a5f907ad 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -147,18 +147,20 @@ void __min_heapify_all(min_heap_char *heap, size_t elem_size,
 
 /* Remove minimum element from the heap, O(log2(nr)). */
 static __always_inline
-void __min_heap_pop(min_heap_char *heap, size_t elem_size,
+bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
 		const struct min_heap_callbacks *func, void *args)
 {
 	void *data = heap->data;
 
 	if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap"))
-		return;
+		return false;
 
 	/* Place last element at the root (position 0) and then sift down. */
 	heap->nr--;
 	memcpy(data, data + (heap->nr * elem_size), elem_size);
 	__min_heapify(heap, 0, elem_size, func, args);
+
+	return true;
 }
 
 #define min_heap_pop(_heap, _func, _args)	\
@@ -184,7 +186,7 @@ void __min_heap_pop_push(min_heap_char *heap,
 
 /* Push an element on to the heap, O(log2(nr)). */
 static __always_inline
-void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
+bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
 		const struct min_heap_callbacks *func, void *args)
 {
 	void *data = heap->data;
@@ -192,7 +194,7 @@ void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
 	int pos;
 
 	if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap"))
-		return;
+		return false;
 
 	/* Place at the end of data. */
 	pos = heap->nr;
@@ -207,6 +209,8 @@ void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
 			break;
 		func->swp(parent, child, args);
 	}
+
+	return true;
 }
 
 #define min_heap_push(_heap, _element, _func, _args)	\
-- 
cgit v1.2.3


From bfe3127180418f1b569999954cb653b9926f75ae Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Fri, 24 May 2024 23:29:54 +0800
Subject: lib min_heap: rename min_heapify() to min_heap_sift_down()

After adding min_heap_sift_up(), the naming convention has been adjusted
to maintain consistency with the min_heap_sift_up().  Consequently,
min_heapify() has been renamed to min_heap_sift_down().

Link: https://lkml.kernel.org/CAP-5=fVcBAxt8Mw72=NCJPRJfjDaJcqk4rjbadgouAEAHz_q1A@mail.gmail.com
Link: https://lkml.kernel.org/r/20240524152958.919343-13-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 3410a5f907ad..0baee5787247 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -75,7 +75,7 @@ bool __min_heap_full(min_heap_char *heap)
 
 /* Sift the element at pos down the heap. */
 static __always_inline
-void __min_heapify(min_heap_char *heap, int pos, size_t elem_size,
+void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
 		const struct min_heap_callbacks *func, void *args)
 {
 	void *left, *right;
@@ -108,8 +108,8 @@ void __min_heapify(min_heap_char *heap, int pos, size_t elem_size,
 	}
 }
 
-#define min_heapify(_heap, _pos, _func, _args)	\
-	__min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args)
+#define min_heap_sift_down(_heap, _pos, _func, _args)	\
+	__min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args)
 
 /* Sift up ith element from the heap, O(log2(nr)). */
 static __always_inline
@@ -139,7 +139,7 @@ void __min_heapify_all(min_heap_char *heap, size_t elem_size,
 	int i;
 
 	for (i = heap->nr / 2 - 1; i >= 0; i--)
-		__min_heapify(heap, i, elem_size, func, args);
+		__min_heap_sift_down(heap, i, elem_size, func, args);
 }
 
 #define min_heapify_all(_heap, _func, _args)	\
@@ -158,7 +158,7 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
 	/* Place last element at the root (position 0) and then sift down. */
 	heap->nr--;
 	memcpy(data, data + (heap->nr * elem_size), elem_size);
-	__min_heapify(heap, 0, elem_size, func, args);
+	__min_heap_sift_down(heap, 0, elem_size, func, args);
 
 	return true;
 }
@@ -178,7 +178,7 @@ void __min_heap_pop_push(min_heap_char *heap,
 		void *args)
 {
 	memcpy(heap->data, element, elem_size);
-	__min_heapify(heap, 0, elem_size, func, args);
+	__min_heap_sift_down(heap, 0, elem_size, func, args);
 }
 
 #define min_heap_pop_push(_heap, _element, _func, _args)	\
@@ -232,7 +232,7 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
 		return true;
 	func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args);
 	__min_heap_sift_up(heap, elem_size, idx, func, args);
-	__min_heapify(heap, idx, elem_size, func, args);
+	__min_heap_sift_down(heap, idx, elem_size, func, args);
 
 	return true;
 }
-- 
cgit v1.2.3


From e596930fc78b97b71df80df0fb1cbf2efb9fc6e4 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Fri, 24 May 2024 23:29:55 +0800
Subject: lib min_heap: update min_heap_push() to use min_heap_sift_up()

Update min_heap_push() to use min_heap_sift_up() rather than its origin
inline version.

Link: https://lkml.kernel.org/r/20240524152958.919343-14-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Coly Li <colyli@suse.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Sakai <msakai@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/min_heap.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 0baee5787247..43a7b9dcf15e 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -190,7 +190,6 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
 		const struct min_heap_callbacks *func, void *args)
 {
 	void *data = heap->data;
-	void *child, *parent;
 	int pos;
 
 	if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap"))
@@ -202,13 +201,7 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
 	heap->nr++;
 
 	/* Sift child at pos up. */
-	for (; pos > 0; pos = (pos - 1) / 2) {
-		child = data + (pos * elem_size);
-		parent = data + ((pos - 1) / 2) * elem_size;
-		if (func->less(parent, child, args))
-			break;
-		func->swp(parent, child, args);
-	}
+	__min_heap_sift_up(heap, elem_size, pos, func, args);
 
 	return true;
 }
-- 
cgit v1.2.3


From 7c45d8282660cf4ce011d96bf918df50b7481b3c Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Mon, 27 May 2024 17:56:44 -0700
Subject: sched: avoid using ilog2() in sched.h

<linux/sched.h> indirectly via cpumask.h path includes the ilog2.h header
to calculate ilog2(TASK_REPORT_MAX).  The following patches drops sched.h
dependency on cpumask.h, and to have a successful build, the header has to
be included explicitly.

sched.h is a frequently included header, and it's better to keep the
dependency list as small as possible.  So, instead of including ilog2.h
for a single BUILD_BUG_ON() check, the same check may be implemented by
taking exponent of the other part of equation.

Link: https://lkml.kernel.org/r/20240528005648.182376-3-yury.norov@gmail.com
Signed-off-by: Yury Norov <yury.norov@gmail.com>
Cc: Amit Daniel Kachhap <amit.kachhap@gmail.com>
Cc: Anna-Maria Behnsen <anna-maria@linutronix.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Yury Norov <yury.norov@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61591ac6eab6..98abb07de149 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1604,7 +1604,7 @@ static inline char task_index_to_char(unsigned int state)
 {
 	static const char state_char[] = "RSDTtXZPI";
 
-	BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);
+	BUILD_BUG_ON(TASK_REPORT_MAX * 2 != 1 << (sizeof(state_char) - 1));
 
 	return state_char[state];
 }
-- 
cgit v1.2.3


From eb4faa36d674eed60a522a72a3a40384f4b285d4 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@nvidia.com>
Date: Mon, 27 May 2024 17:56:45 -0700
Subject: cpumask: split out include/linux/cpumask_types.h

Many core headers, like sched.h, include cpumask.h mostly for struct
cpumask and cpumask_var_t.  Those are frequently used headers and
shouldn't pull more than the bare minimum.

Link: https://lkml.kernel.org/r/20240528005648.182376-4-yury.norov@gmail.com
Signed-off-by: Yury Norov <yury.norov@gmail.com>
Cc: Amit Daniel Kachhap <amit.kachhap@gmail.com>
Cc: Anna-Maria Behnsen <anna-maria@linutronix.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/cpumask.h       | 56 +-----------------------------------
 include/linux/cpumask_types.h | 66 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 55 deletions(-)
 create mode 100644 include/linux/cpumask_types.h

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 23686bed441d..76dca7b86189 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -9,25 +9,13 @@
  */
 #include <linux/cleanup.h>
 #include <linux/kernel.h>
-#include <linux/threads.h>
 #include <linux/bitmap.h>
+#include <linux/cpumask_types.h>
 #include <linux/atomic.h>
 #include <linux/bug.h>
 #include <linux/gfp_types.h>
 #include <linux/numa.h>
 
-/* Don't assign or return these: may not be this big! */
-typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
-
-/**
- * cpumask_bits - get the bits in a cpumask
- * @maskp: the struct cpumask *
- *
- * You should only assume nr_cpu_ids bits of this mask are valid.  This is
- * a macro so it's const-correct.
- */
-#define cpumask_bits(maskp) ((maskp)->bits)
-
 /**
  * cpumask_pr_args - printf args to output a cpumask
  * @maskp: cpumask to be printed
@@ -922,48 +910,7 @@ static inline unsigned int cpumask_size(void)
 	return bitmap_size(large_cpumask_bits);
 }
 
-/*
- * cpumask_var_t: struct cpumask for stack usage.
- *
- * Oh, the wicked games we play!  In order to make kernel coding a
- * little more difficult, we typedef cpumask_var_t to an array or a
- * pointer: doing &mask on an array is a noop, so it still works.
- *
- * i.e.
- *	cpumask_var_t tmpmask;
- *	if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
- *		return -ENOMEM;
- *
- *	  ... use 'tmpmask' like a normal struct cpumask * ...
- *
- *	free_cpumask_var(tmpmask);
- *
- *
- * However, one notable exception is there. alloc_cpumask_var() allocates
- * only nr_cpumask_bits bits (in the other hand, real cpumask_t always has
- * NR_CPUS bits). Therefore you don't have to dereference cpumask_var_t.
- *
- *	cpumask_var_t tmpmask;
- *	if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
- *		return -ENOMEM;
- *
- *	var = *tmpmask;
- *
- * This code makes NR_CPUS length memcopy and brings to a memory corruption.
- * cpumask_copy() provide safe copy functionality.
- *
- * Note that there is another evil here: If you define a cpumask_var_t
- * as a percpu variable then the way to obtain the address of the cpumask
- * structure differently influences what this_cpu_* operation needs to be
- * used. Please use this_cpu_cpumask_var_t in those cases. The direct use
- * of this_cpu_ptr() or this_cpu_read() will lead to failures when the
- * other type of cpumask_var_t implementation is configured.
- *
- * Please also note that __cpumask_var_read_mostly can be used to declare
- * a cpumask_var_t variable itself (not its content) as read mostly.
- */
 #ifdef CONFIG_CPUMASK_OFFSTACK
-typedef struct cpumask *cpumask_var_t;
 
 #define this_cpu_cpumask_var_ptr(x)	this_cpu_read(x)
 #define __cpumask_var_read_mostly	__read_mostly
@@ -1010,7 +957,6 @@ static inline bool cpumask_available(cpumask_var_t mask)
 }
 
 #else
-typedef struct cpumask cpumask_var_t[1];
 
 #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x)
 #define __cpumask_var_read_mostly
diff --git a/include/linux/cpumask_types.h b/include/linux/cpumask_types.h
new file mode 100644
index 000000000000..461ed1b6bcdb
--- /dev/null
+++ b/include/linux/cpumask_types.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_CPUMASK_TYPES_H
+#define __LINUX_CPUMASK_TYPES_H
+
+#include <linux/bitops.h>
+#include <linux/threads.h>
+
+/* Don't assign or return these: may not be this big! */
+typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
+
+/**
+ * cpumask_bits - get the bits in a cpumask
+ * @maskp: the struct cpumask *
+ *
+ * You should only assume nr_cpu_ids bits of this mask are valid.  This is
+ * a macro so it's const-correct.
+ */
+#define cpumask_bits(maskp) ((maskp)->bits)
+
+/*
+ * cpumask_var_t: struct cpumask for stack usage.
+ *
+ * Oh, the wicked games we play!  In order to make kernel coding a
+ * little more difficult, we typedef cpumask_var_t to an array or a
+ * pointer: doing &mask on an array is a noop, so it still works.
+ *
+ * i.e.
+ *	cpumask_var_t tmpmask;
+ *	if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ *		return -ENOMEM;
+ *
+ *	  ... use 'tmpmask' like a normal struct cpumask * ...
+ *
+ *	free_cpumask_var(tmpmask);
+ *
+ *
+ * However, one notable exception is there. alloc_cpumask_var() allocates
+ * only nr_cpumask_bits bits (in the other hand, real cpumask_t always has
+ * NR_CPUS bits). Therefore you don't have to dereference cpumask_var_t.
+ *
+ *	cpumask_var_t tmpmask;
+ *	if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ *		return -ENOMEM;
+ *
+ *	var = *tmpmask;
+ *
+ * This code makes NR_CPUS length memcopy and brings to a memory corruption.
+ * cpumask_copy() provide safe copy functionality.
+ *
+ * Note that there is another evil here: If you define a cpumask_var_t
+ * as a percpu variable then the way to obtain the address of the cpumask
+ * structure differently influences what this_cpu_* operation needs to be
+ * used. Please use this_cpu_cpumask_var_t in those cases. The direct use
+ * of this_cpu_ptr() or this_cpu_read() will lead to failures when the
+ * other type of cpumask_var_t implementation is configured.
+ *
+ * Please also note that __cpumask_var_read_mostly can be used to declare
+ * a cpumask_var_t variable itself (not its content) as read mostly.
+ */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+typedef struct cpumask *cpumask_var_t;
+#else
+typedef struct cpumask cpumask_var_t[1];
+#endif /* CONFIG_CPUMASK_OFFSTACK */
+
+#endif /* __LINUX_CPUMASK_TYPES_H */
-- 
cgit v1.2.3


From 361c1f04f3b43b00997b2845c7a9e1d0a9b4c38a Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Mon, 27 May 2024 17:56:46 -0700
Subject: sched: drop sched.h dependency on cpumask

sched.h needs cpumask.h mostly for types declaration.  Now that we have
cpumask_types.h, which is a significantly smaller header, we can rely on
it.

The only exception is UP stub for set_cpus_allowed_ptr().  The function
needs to test bit #0 in a @new_mask, which can be trivially opencoded.

Link: https://lkml.kernel.org/r/20240528005648.182376-5-yury.norov@gmail.com
Signed-off-by: Yury Norov <yury.norov@gmail.com>
Cc: Amit Daniel Kachhap <amit.kachhap@gmail.com>
Cc: Anna-Maria Behnsen <anna-maria@linutronix.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Yury Norov <yury.norov@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 98abb07de149..f2f907ef1389 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -13,7 +13,7 @@
 #include <asm/processor.h>
 #include <linux/thread_info.h>
 #include <linux/preempt.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
 
 #include <linux/cache.h>
 #include <linux/irqflags_types.h>
@@ -1778,7 +1778,8 @@ static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpuma
 }
 static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 {
-	if (!cpumask_test_cpu(0, new_mask))
+	/* Opencoded cpumask_test_cpu(0, new_mask) to avoid dependency on cpumask.h */
+	if ((*cpumask_bits(new_mask) & 1) == 0)
 		return -EINVAL;
 	return 0;
 }
-- 
cgit v1.2.3


From 7f36688f126ba4a4ec510fa81466b1dacdec97ee Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Mon, 27 May 2024 17:56:47 -0700
Subject: cpumask: cleanup core headers inclusion

Many core headers include cpumask.h for nothing. Drop it.

Link: https://lkml.kernel.org/r/20240528005648.182376-6-yury.norov@gmail.com
Signed-off-by: Yury Norov <yury.norov@gmail.com>
Cc: Amit Daniel Kachhap <amit.kachhap@gmail.com>
Cc: Anna-Maria Behnsen <anna-maria@linutronix.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Yury Norov <yury.norov@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/cgroup.h      | 1 -
 include/linux/cpu.h         | 1 -
 include/linux/cpu_cooling.h | 1 -
 include/linux/kernel_stat.h | 1 -
 include/linux/node.h        | 1 -
 include/linux/percpu.h      | 1 -
 include/linux/profile.h     | 1 -
 include/linux/rcupdate.h    | 1 -
 include/linux/seq_file.h    | 1 -
 include/linux/tracepoint.h  | 1 -
 10 files changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2150ca60394b..c60ba0ab1462 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -10,7 +10,6 @@
  */
 
 #include <linux/sched.h>
-#include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/rculist.h>
 #include <linux/cgroupstats.h>
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 861c3bfc5f17..ea6ac8f98e4a 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -16,7 +16,6 @@
 
 #include <linux/node.h>
 #include <linux/compiler.h>
-#include <linux/cpumask.h>
 #include <linux/cpuhotplug.h>
 #include <linux/cpu_smt.h>
 
diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index a3bdc8a98f2c..2c774fb3c091 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -15,7 +15,6 @@
 
 #include <linux/of.h>
 #include <linux/thermal.h>
-#include <linux/cpumask.h>
 
 struct cpufreq_policy;
 
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 9c042c6384bb..b97ce2df376f 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -5,7 +5,6 @@
 #include <linux/smp.h>
 #include <linux/threads.h>
 #include <linux/percpu.h>
-#include <linux/cpumask.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/vtime.h>
diff --git a/include/linux/node.h b/include/linux/node.h
index dfc004e4bee7..9a881c2208b3 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -16,7 +16,6 @@
 #define _LINUX_NODE_H_
 
 #include <linux/device.h>
-#include <linux/cpumask.h>
 #include <linux/list.h>
 
 /**
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 03053de557cf..4b2047b78b67 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -6,7 +6,6 @@
 #include <linux/mmdebug.h>
 #include <linux/preempt.h>
 #include <linux/smp.h>
-#include <linux/cpumask.h>
 #include <linux/pfn.h>
 #include <linux/init.h>
 #include <linux/cleanup.h>
diff --git a/include/linux/profile.h b/include/linux/profile.h
index 04ae5ebcb637..2fb487f61d12 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -4,7 +4,6 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/cpumask.h>
 #include <linux/cache.h>
 
 #include <asm/errno.h>
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index dfd2399f2cde..fb8ab4618d63 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -29,7 +29,6 @@
 #include <linux/lockdep.h>
 #include <linux/cleanup.h>
 #include <asm/processor.h>
-#include <linux/cpumask.h>
 #include <linux/context_tracking_irq.h>
 
 #define ULONG_CMP_GE(a, b)	(ULONG_MAX / 2 >= (a) - (b))
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 8bd4fda6e027..2fb266ea69fa 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -7,7 +7,6 @@
 #include <linux/string_helpers.h>
 #include <linux/bug.h>
 #include <linux/mutex.h>
-#include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/fs.h>
 #include <linux/cred.h>
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 689b6d71590e..6be396bb4297 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -16,7 +16,6 @@
 #include <linux/srcu.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/cpumask.h>
 #include <linux/rcupdate.h>
 #include <linux/tracepoint-defs.h>
 #include <linux/static_call.h>
-- 
cgit v1.2.3


From e1b6705bcfb2797ea182e313d5ec4f57fa8571f2 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Mon, 27 May 2024 17:56:48 -0700
Subject: cpumask: make core headers including cpumask_types.h where possible

Now that cpumask types are split out to a separate smaller header, many
frequently included core headers may switch to using it.

Link: https://lkml.kernel.org/r/20240528005648.182376-7-yury.norov@gmail.com
Signed-off-by: Yury Norov <yury.norov@gmail.com>
Cc: Amit Daniel Kachhap <amit.kachhap@gmail.com>
Cc: Anna-Maria Behnsen <anna-maria@linutronix.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Yury Norov <yury.norov@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/cacheinfo.h                    | 2 +-
 include/linux/clockchips.h                   | 2 +-
 include/linux/cpu_rmap.h                     | 2 +-
 include/linux/interrupt.h                    | 2 +-
 include/linux/irqchip/irq-partition-percpu.h | 2 +-
 include/linux/msi.h                          | 2 +-
 include/linux/pm_domain.h                    | 2 +-
 include/linux/stop_machine.h                 | 2 +-
 include/linux/torture.h                      | 2 +-
 include/linux/workqueue.h                    | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 2cb15fe4fe12..286db104e054 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -3,7 +3,7 @@
 #define _LINUX_CACHEINFO_H
 
 #include <linux/bitops.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
 #include <linux/smp.h>
 
 struct device_node;
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 9aac31d856f3..b0df28ddd394 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -12,7 +12,7 @@
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
 # include <linux/clocksource.h>
-# include <linux/cpumask.h>
+# include <linux/cpumask_types.h>
 # include <linux/ktime.h>
 # include <linux/notifier.h>
 
diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h
index cae324d10965..20b5729903d7 100644
--- a/include/linux/cpu_rmap.h
+++ b/include/linux/cpu_rmap.h
@@ -7,7 +7,7 @@
  * Copyright 2011 Solarflare Communications Inc.
  */
 
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/kref.h>
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5c9bdd3ffccc..136a55455529 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -5,13 +5,13 @@
 
 #include <linux/kernel.h>
 #include <linux/bitops.h>
-#include <linux/cpumask.h>
 #include <linux/irqreturn.h>
 #include <linux/irqnr.h>
 #include <linux/hardirq.h>
 #include <linux/irqflags.h>
 #include <linux/hrtimer.h>
 #include <linux/kref.h>
+#include <linux/cpumask_types.h>
 #include <linux/workqueue.h>
 #include <linux/jump_label.h>
 
diff --git a/include/linux/irqchip/irq-partition-percpu.h b/include/linux/irqchip/irq-partition-percpu.h
index 2f6ae7551748..b35ee22c278f 100644
--- a/include/linux/irqchip/irq-partition-percpu.h
+++ b/include/linux/irqchip/irq-partition-percpu.h
@@ -8,7 +8,7 @@
 #define __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H
 
 #include <linux/fwnode.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
 #include <linux/irqdomain.h>
 
 struct partition_affinity {
diff --git a/include/linux/msi.h b/include/linux/msi.h
index dc27cf3903d5..26588da88bdd 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -19,7 +19,7 @@
  */
 
 #include <linux/irqdomain_defs.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
 #include <linux/msi_api.h>
 #include <linux/xarray.h>
 #include <linux/mutex.h>
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index f24546a3d3db..71e4f0fb8867 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -16,7 +16,7 @@
 #include <linux/of.h>
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
 #include <linux/time64.h>
 
 /*
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index ea7a74ea7389..3132262a404d 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -3,7 +3,7 @@
 #define _LINUX_STOP_MACHINE
 
 #include <linux/cpu.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
 #include <linux/smp.h>
 #include <linux/list.h>
 
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 1541454da03e..c2e979f82f8d 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -14,7 +14,7 @@
 #include <linux/cache.h>
 #include <linux/spinlock.h>
 #include <linux/threads.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
 #include <linux/seqlock.h>
 #include <linux/lockdep.h>
 #include <linux/completion.h>
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index fb3993894536..52496f07fba5 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -12,7 +12,7 @@
 #include <linux/lockdep.h>
 #include <linux/threads.h>
 #include <linux/atomic.h>
-#include <linux/cpumask.h>
+#include <linux/cpumask_types.h>
 #include <linux/rcupdate.h>
 #include <linux/workqueue_types.h>
 
-- 
cgit v1.2.3


From 2f183c68345a26213e5e7f798399bee68d1c4a97 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 31 May 2024 12:04:57 +0300
Subject: kernel/panic: add verbose logging of kernel taints in backtraces

With nearly 20 taint flags and respective characters, it's getting a bit
difficult to remember what each taint flag character means.  Add verbose
logging of the set taints in the format:

Tainted: [P]=PROPRIETARY_MODULE, [W]=WARN

in dump_stack_print_info() when there are taints.

Note that the "negative flag" G is not included.

Link: https://lkml.kernel.org/r/7321e306166cb2ca2807ab8639e665baa2462e9c.1717146197.git.jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/panic.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/panic.h b/include/linux/panic.h
index 6717b15e798c..3130e0b5116b 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -77,9 +77,10 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout)
 #define TAINT_FLAGS_MAX			((1UL << TAINT_FLAGS_COUNT) - 1)
 
 struct taint_flag {
-	char c_true;	/* character printed when tainted */
-	char c_false;	/* character printed when not tainted */
-	bool module;	/* also show as a per-module taint flag */
+	char c_true;		/* character printed when tainted */
+	char c_false;		/* character printed when not tainted */
+	bool module;		/* also show as a per-module taint flag */
+	const char *desc;	/* verbose description of the set taint flag */
 };
 
 extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT];
@@ -90,6 +91,7 @@ enum lockdep_ok {
 };
 
 extern const char *print_tainted(void);
+extern const char *print_tainted_verbose(void);
 extern void add_taint(unsigned flag, enum lockdep_ok);
 extern int test_taint(unsigned flag);
 extern unsigned long get_taint(void);
-- 
cgit v1.2.3


From cc8d5a2f09a54405321769abfd6ec3395482336a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 25 Jun 2024 07:58:10 +0200
Subject: Revert "printk: Save console options for
 add_preferred_console_match()"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit f03e8c1060f86c23eb49bafee99d9fcbd1c1bd77.

Let's roll back all of the serial core and printk console changes that
went into 6.10-rc1 as there still are problems with them that need to be
sorted out.

Link: https://lore.kernel.org/r/ZnpRozsdw6zbjqze@tlindgre-MOBL1
Reported-by: Petr Mladek <pmladek@suse.com>
Reported-by: Tony Lindgren <tony@atomide.com>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/printk.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 40afab23881a..65c5184470f1 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -60,9 +60,6 @@ static inline const char *printk_skip_headers(const char *buffer)
 #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT
 #define CONSOLE_LOGLEVEL_QUIET	 CONFIG_CONSOLE_LOGLEVEL_QUIET
 
-int add_preferred_console_match(const char *match, const char *name,
-				const short idx);
-
 extern int console_printk[];
 
 #define console_loglevel (console_printk[0])
-- 
cgit v1.2.3


From 07b87f9eea0c30675084d50c82532d20168da009 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 20 Jun 2024 08:47:24 +0200
Subject: xfrm: Fix unregister netdevice hang on hardware offload.

When offloading xfrm states to hardware, the offloading
device is attached to the skbs secpath. If a skb is free
is deferred, an unregister netdevice hangs because the
netdevice is still refcounted.

Fix this by removing the netdevice from the xfrm states
when the netdevice is unregistered. To find all xfrm states
that need to be cleared we add another list where skbs
linked to that are unlinked from the lists (deleted)
but not yet freed.

Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API")
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 36 ++++++++++--------------------------
 1 file changed, 10 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 77ebf5bcf0b9..7d4c2235252c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -178,7 +178,10 @@ struct xfrm_state {
 		struct hlist_node	gclist;
 		struct hlist_node	bydst;
 	};
-	struct hlist_node	bysrc;
+	union {
+		struct hlist_node	dev_gclist;
+		struct hlist_node	bysrc;
+	};
 	struct hlist_node	byspi;
 	struct hlist_node	byseq;
 
@@ -1588,7 +1591,7 @@ void xfrm_state_update_stats(struct net *net);
 static inline void xfrm_dev_state_update_stats(struct xfrm_state *x)
 {
 	struct xfrm_dev_offload *xdo = &x->xso;
-	struct net_device *dev = xdo->dev;
+	struct net_device *dev = READ_ONCE(xdo->dev);
 
 	if (dev && dev->xfrmdev_ops &&
 	    dev->xfrmdev_ops->xdo_dev_state_update_stats)
@@ -1946,13 +1949,16 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
 			struct xfrm_user_offload *xuo, u8 dir,
 			struct netlink_ext_ack *extack);
 bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
+void xfrm_dev_state_delete(struct xfrm_state *x);
+void xfrm_dev_state_free(struct xfrm_state *x);
 
 static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
 {
 	struct xfrm_dev_offload *xso = &x->xso;
+	struct net_device *dev = READ_ONCE(xso->dev);
 
-	if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn)
-		xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x);
+	if (dev && dev->xfrmdev_ops->xdo_dev_state_advance_esn)
+		dev->xfrmdev_ops->xdo_dev_state_advance_esn(x);
 }
 
 static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
@@ -1973,28 +1979,6 @@ static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
 	return false;
 }
 
-static inline void xfrm_dev_state_delete(struct xfrm_state *x)
-{
-	struct xfrm_dev_offload *xso = &x->xso;
-
-	if (xso->dev)
-		xso->dev->xfrmdev_ops->xdo_dev_state_delete(x);
-}
-
-static inline void xfrm_dev_state_free(struct xfrm_state *x)
-{
-	struct xfrm_dev_offload *xso = &x->xso;
-	struct net_device *dev = xso->dev;
-
-	if (dev && dev->xfrmdev_ops) {
-		if (dev->xfrmdev_ops->xdo_dev_state_free)
-			dev->xfrmdev_ops->xdo_dev_state_free(x);
-		xso->dev = NULL;
-		xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
-		netdev_put(dev, &xso->dev_tracker);
-	}
-}
-
 static inline void xfrm_dev_policy_delete(struct xfrm_policy *x)
 {
 	struct xfrm_dev_offload *xdo = &x->xdo;
-- 
cgit v1.2.3


From 98f706de445b464f25220360210a4bcb9cc6c41a Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 20 Jun 2024 13:56:16 -0700
Subject: af_unix: Define locking order for U_LOCK_SECOND in
 unix_stream_connect().

While a SOCK_(STREAM|SEQPACKET) socket connect()s to another, we hold
two locks of them by unix_state_lock() and unix_state_lock_nested() in
unix_stream_connect().

Before unix_state_lock_nested(), the following is guaranteed by checking
sk->sk_state:

  1. The first socket is TCP_LISTEN
  2. The second socket is not the first one
  3. Simultaneous connect() must fail

So, the client state can be TCP_CLOSE or TCP_LISTEN or TCP_ESTABLISHED.

Let's define the expected states as unix_state_lock_cmp_fn() instead of
using unix_state_lock_nested().

Note that 2. is detected by debug_spin_lock_before() and 3. cannot be
expressed as lock_cmp_fn.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/af_unix.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index b6eedf7650da..fd813ad73ab8 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -98,7 +98,6 @@ struct unix_sock {
 #define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
 enum unix_socket_lock_class {
 	U_LOCK_NORMAL,
-	U_LOCK_SECOND,	/* for double locking, see unix_state_double_lock(). */
 	U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */
 	U_LOCK_GC_LISTENER, /* used for listening socket while determining gc
 			     * candidates to close a small race window.
-- 
cgit v1.2.3


From c4da4661d985fd3cbaea3ea6101e2dd0d2ad4b74 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 20 Jun 2024 13:56:18 -0700
Subject: af_unix: Remove U_LOCK_DIAG.

sk_diag_dump_icons() acquires embryo's lock by unix_state_lock_nested()
to fetch its peer.

The embryo's ->peer is set to NULL only when its parent listener is
close()d.  Then, unix_release_sock() is called for each embryo after
unlinking skb by skb_dequeue().

In sk_diag_dump_icons(), we hold the parent's recvq lock, so we need
not acquire unix_state_lock_nested(), and peer is always non-NULL.

Let's remove unnecessary unix_state_lock_nested() and non-NULL test
for peer.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/af_unix.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index fd813ad73ab8..c42645199cee 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -98,7 +98,6 @@ struct unix_sock {
 #define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
 enum unix_socket_lock_class {
 	U_LOCK_NORMAL,
-	U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */
 	U_LOCK_GC_LISTENER, /* used for listening socket while determining gc
 			     * candidates to close a small race window.
 			     */
-- 
cgit v1.2.3


From 7202cb591624b860bd7b688d4bd0f5bee79c7f44 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 20 Jun 2024 13:56:19 -0700
Subject: af_unix: Remove U_LOCK_GC_LISTENER.

Commit 1971d13ffa84 ("af_unix: Suppress false-positive lockdep splat for
spin_lock() in __unix_gc().") added U_LOCK_GC_LISTENER for the old GC,
but it's no longer needed for the new GC.

Let's remove U_LOCK_GC_LISTENER and unix_state_lock_nested() as there's
no user.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/af_unix.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index c42645199cee..63129c79b8cb 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -96,18 +96,6 @@ struct unix_sock {
 
 #define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
 #define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
-enum unix_socket_lock_class {
-	U_LOCK_NORMAL,
-	U_LOCK_GC_LISTENER, /* used for listening socket while determining gc
-			     * candidates to close a small race window.
-			     */
-};
-
-static inline void unix_state_lock_nested(struct sock *sk,
-				   enum unix_socket_lock_class subclass)
-{
-	spin_lock_nested(&unix_sk(sk)->lock, subclass);
-}
 
 #define peer_wait peer_wq.wait
 
-- 
cgit v1.2.3


From 9b6a14f08b4875aa22ea0b5bc35042e2580b311b Mon Sep 17 00:00:00 2001
From: Youling Tang <tangyouling@kylinos.cn>
Date: Thu, 20 Jun 2024 11:23:33 +0800
Subject: fs: Export in_group_or_capable()

Export in_group_or_capable() as a VFS helper function.

Signed-off-by: Youling Tang <tangyouling@kylinos.cn>
Link: https://lore.kernel.org/r/20240620032335.147136-1-youling.tang@linux.dev
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bfc1e6407bf6..942cb11dba96 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1942,6 +1942,8 @@ void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
 extern bool may_open_dev(const struct path *path);
 umode_t mode_strip_sgid(struct mnt_idmap *idmap,
 			const struct inode *dir, umode_t mode);
+bool in_group_or_capable(struct mnt_idmap *idmap,
+			 const struct inode *inode, vfsgid_t vfsgid);
 
 /*
  * This is the "filldir" function type, used by readdir() to let
-- 
cgit v1.2.3


From ff46e3b4421923937b7f6e44ffcd3549a074f321 Mon Sep 17 00:00:00 2001
From: luoxuanqiang <luoxuanqiang@kylinos.cn>
Date: Fri, 21 Jun 2024 09:39:29 +0800
Subject: Fix race for duplicate reqsk on identical SYN

When bonding is configured in BOND_MODE_BROADCAST mode, if two identical
SYN packets are received at the same time and processed on different CPUs,
it can potentially create the same sk (sock) but two different reqsk
(request_sock) in tcp_conn_request().

These two different reqsk will respond with two SYNACK packets, and since
the generation of the seq (ISN) incorporates a timestamp, the final two
SYNACK packets will have different seq values.

The consequence is that when the Client receives and replies with an ACK
to the earlier SYNACK packet, we will reset(RST) it.

========================================================================

This behavior is consistently reproducible in my local setup,
which comprises:

                  | NETA1 ------ NETB1 |
PC_A --- bond --- |                    | --- bond --- PC_B
                  | NETA2 ------ NETB2 |

- PC_A is the Server and has two network cards, NETA1 and NETA2. I have
  bonded these two cards using BOND_MODE_BROADCAST mode and configured
  them to be handled by different CPU.

- PC_B is the Client, also equipped with two network cards, NETB1 and
  NETB2, which are also bonded and configured in BOND_MODE_BROADCAST mode.

If the client attempts a TCP connection to the server, it might encounter
a failure. Capturing packets from the server side reveals:

10.10.10.10.45182 > localhost: Flags [S], seq 320236027,
10.10.10.10.45182 > localhost: Flags [S], seq 320236027,
localhost > 10.10.10.10.45182: Flags [S.], seq 2967855116,
localhost > 10.10.10.10.45182: Flags [S.], seq 2967855123, <==
10.10.10.10.45182 > localhost: Flags [.], ack 4294967290,
10.10.10.10.45182 > localhost: Flags [.], ack 4294967290,
localhost > 10.10.10.10.45182: Flags [R], seq 2967855117, <==
localhost > 10.10.10.10.45182: Flags [R], seq 2967855117,

Two SYNACKs with different seq numbers are sent by localhost,
resulting in an anomaly.

========================================================================

The attempted solution is as follows:
Add a return value to inet_csk_reqsk_queue_hash_add() to confirm if the
ehash insertion is successful (Up to now, the reason for unsuccessful
insertion is that a reqsk for the same connection has already been
inserted). If the insertion fails, release the reqsk.

Due to the refcnt, Kuniyuki suggests also adding a return value check
for the DCCP module; if ehash insertion fails, indicating a successful
insertion of the same connection, simply release the reqsk as well.

Simultaneously, In the reqsk_queue_hash_req(), the start of the
req->rsk_timer is adjusted to be after successful insertion.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: luoxuanqiang <luoxuanqiang@kylinos.cn>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240621013929.1386815-1-luoxuanqiang@kylinos.cn
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/inet_connection_sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 7d6b1254c92d..c0deaafebfdc 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -263,7 +263,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
 				      struct request_sock *req,
 				      struct sock *child);
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 				   unsigned long timeout);
 struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
 					 struct request_sock *req,
-- 
cgit v1.2.3


From 06ec7893a4b48a1fad9e94cb670862ddd65b6eab Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 24 Jun 2024 20:39:58 +0300
Subject: drm/connector: hdmi: shorten too long function name

If CONFIG_MODVERSIONS is enabled, then using the HDMI Connector
framework can result in build failures. Rename the function to make it
fit into the name requirements.

ERROR: modpost: too long symbol "drm_atomic_helper_connector_hdmi_disable_audio_infoframe" [drivers/gpu/drm/msm/msm.ko]

Reported-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240624-hdmi-connector-shorten-name-v1-1-5bd3410138db@linaro.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 include/drm/display/drm_hdmi_state_helper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/display/drm_hdmi_state_helper.h b/include/drm/display/drm_hdmi_state_helper.h
index 285f366cf716..2d45fcfa4619 100644
--- a/include/drm/display/drm_hdmi_state_helper.h
+++ b/include/drm/display/drm_hdmi_state_helper.h
@@ -16,7 +16,7 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector,
 
 int drm_atomic_helper_connector_hdmi_update_audio_infoframe(struct drm_connector *connector,
 							    struct hdmi_audio_infoframe *frame);
-int drm_atomic_helper_connector_hdmi_disable_audio_infoframe(struct drm_connector *connector);
+int drm_atomic_helper_connector_hdmi_clear_audio_infoframe(struct drm_connector *connector);
 int drm_atomic_helper_connector_hdmi_update_infoframes(struct drm_connector *connector,
 						       struct drm_atomic_state *state);
 
-- 
cgit v1.2.3


From b65bd7874bc7dc709645fd8acb53603b73ae950a Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 24 Jun 2024 21:59:42 +0200
Subject: fbdev: mmp: Constify struct mmp_overlay_ops

'struct mmp_overlay_ops' is not modified in this driver.

Constifying this structure moves some data to a read-only section, so
increase overall security.

On a x86_64, with allmodconfig, as an example:
Before:
======
   text	   data	    bss	    dec	    hex	filename
  11798	    555	     16	  12369	   3051	drivers/video/fbdev/mmp/hw/mmp_ctrl.o

After:
=====
   text	   data	    bss	    dec	    hex	filename
  11834	    507	     16	  12357	   3045	drivers/video/fbdev/mmp/hw/mmp_ctrl.o

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/video/mmp_disp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/video/mmp_disp.h b/include/video/mmp_disp.h
index a722dcbf5073..41354bd49895 100644
--- a/include/video/mmp_disp.h
+++ b/include/video/mmp_disp.h
@@ -156,7 +156,7 @@ struct mmp_overlay {
 	int status;
 	struct mutex access_ok;
 
-	struct mmp_overlay_ops *ops;
+	const struct mmp_overlay_ops *ops;
 };
 
 /* panel type */
@@ -299,7 +299,7 @@ struct mmp_path_info {
 	int overlay_num;
 	void (*set_mode)(struct mmp_path *path, struct mmp_mode *mode);
 	void (*set_onoff)(struct mmp_path *path, int status);
-	struct mmp_overlay_ops *overlay_ops;
+	const struct mmp_overlay_ops *overlay_ops;
 	void *plat_data;
 };
 
-- 
cgit v1.2.3


From 001c1ff5dc397bc26a3d1a0da8733c179f39c946 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 25 Jun 2024 12:18:05 +0900
Subject: firewire: ohci: add support for Linux kernel tracepoints

The Linux Kernel Tracepoints framework is enough useful to trace the
interaction between 1394 OHCI hardware and its driver.

This commit adds firewire_ohci subsystem to use the framework. It is
defined as the different subsystem from the existing firewire subsystem.
The definition file for the existing subsystem is slightly changed so that
both subsystems are available in 1394 OHCI driver.

Link: https://lore.kernel.org/r/20240625031806.956650-2-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h      |  1 +
 include/trace/events/firewire_ohci.h | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 include/trace/events/firewire_ohci.h

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index d9158a134beb..aa00c9f33551 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 // Copyright (c) 2024 Takashi Sakamoto
 
+#undef TRACE_SYSTEM
 #define TRACE_SYSTEM	firewire
 
 #if !defined(_FIREWIRE_TRACE_EVENT_H) || defined(TRACE_HEADER_MULTI_READ)
diff --git a/include/trace/events/firewire_ohci.h b/include/trace/events/firewire_ohci.h
new file mode 100644
index 000000000000..67fa3c1c8f6d
--- /dev/null
+++ b/include/trace/events/firewire_ohci.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2024 Takashi Sakamoto
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM	firewire_ohci
+
+#if !defined(_FIREWIRE_OHCI_TRACE_EVENT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _FIREWIRE_OHCI_TRACE_EVENT_H
+
+#include <linux/tracepoint.h>
+
+// Placeholder for future use.
+
+#endif // _FIREWIRE_OHCI_TRACE_EVENT_H
+
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 0d8914165dd17b0fd330538871968efabb3227c0 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 25 Jun 2024 12:18:06 +0900
Subject: firewire: ohci: add tracepoints event for hardIRQ event

1394 OHCI hardware triggers PCI interrupts to notify any events to
software. Current driver for the hardware is programmed by the typical
way to utilize top- and bottom- halves, thus it has a timing gap to handle
the notification in softIRQ (tasklet).

This commit adds a tracepoint event for the hardIRQ event. The comparison
of the tracepoint event to tracepoints events in firewire subsystem is
helpful to diagnose the timing gap.

Link: https://lore.kernel.org/r/20240625031806.956650-3-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire_ohci.h | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/firewire_ohci.h b/include/trace/events/firewire_ohci.h
index 67fa3c1c8f6d..483aeeb033af 100644
--- a/include/trace/events/firewire_ohci.h
+++ b/include/trace/events/firewire_ohci.h
@@ -9,7 +9,38 @@
 
 #include <linux/tracepoint.h>
 
-// Placeholder for future use.
+TRACE_EVENT(irqs,
+	TP_PROTO(unsigned int card_index, u32 events),
+	TP_ARGS(card_index, events),
+	TP_STRUCT__entry(
+		__field(u8, card_index)
+		__field(u32, events)
+	),
+	TP_fast_assign(
+		__entry->card_index = card_index;
+		__entry->events = events;
+	),
+	TP_printk(
+		"card_index=%u events=%s",
+		__entry->card_index,
+		__print_flags(__entry->events, "|",
+			{ OHCI1394_selfIDComplete,	"selfIDComplete" },
+			{ OHCI1394_RQPkt,		"RQPkt" },
+			{ OHCI1394_RSPkt,		"RSPkt" },
+			{ OHCI1394_reqTxComplete,	"reqTxComplete" },
+			{ OHCI1394_respTxComplete,	"respTxComplete" },
+			{ OHCI1394_isochRx,		"isochRx" },
+			{ OHCI1394_isochTx,		"isochTx" },
+			{ OHCI1394_postedWriteErr,	"postedWriteErr" },
+			{ OHCI1394_cycleTooLong,	"cycleTooLong" },
+			{ OHCI1394_cycle64Seconds,	"cycle64Seconds" },
+			{ OHCI1394_cycleInconsistent,	"cycleInconsistent" },
+			{ OHCI1394_regAccessFail,	"regAccessFail" },
+			{ OHCI1394_unrecoverableError,	"unrecoverableError" },
+			{ OHCI1394_busReset,		"busReset" }
+		)
+	)
+);
 
 #endif // _FIREWIRE_OHCI_TRACE_EVENT_H
 
-- 
cgit v1.2.3


From d3882564a77c21eb746ba5364f3fa89b88de3d61 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 20 Jun 2024 14:16:37 +0200
Subject: syscalls: fix compat_sys_io_pgetevents_time64 usage

Using sys_io_pgetevents() as the entry point for compat mode tasks
works almost correctly, but misses the sign extension for the min_nr
and nr arguments.

This was addressed on parisc by switching to
compat_sys_io_pgetevents_time64() in commit 6431e92fc827 ("parisc:
io_pgetevents_time64() needs compat syscall in 32-bit compat mode"),
as well as by using more sophisticated system call wrappers on x86 and
s390. However, arm64, mips, powerpc, sparc and riscv still have the
same bug.

Change all of them over to use compat_sys_io_pgetevents_time64()
like parisc already does. This was clearly the intention when the
function was originally added, but it got hooked up incorrectly in
the tables.

Cc: stable@vger.kernel.org
Fixes: 48166e6ea47d ("y2038: add 64-bit time_t syscalls to all 32-bit architectures")
Acked-by: Heiko Carstens <hca@linux.ibm.com> # s390
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/asm-generic/unistd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index d983c48a3b6a..d4cc26932ff4 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64)
 #define __NR_ppoll_time64 414
 __SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64)
 #define __NR_io_pgetevents_time64 416
-__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
+__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64)
 #define __NR_recvmmsg_time64 417
 __SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64)
 #define __NR_mq_timedsend_time64 418
-- 
cgit v1.2.3


From 295f10061af024099440b46602bcc47364551db7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 4 Jun 2024 14:20:26 +0200
Subject: syscalls: mmap(): use unsigned offset type consistently

Most architectures that implement the old-style mmap() with byte offset
use 'unsigned long' as the type for that offset, but microblaze and
riscv have the off_t type that is shared with userspace, matching the
prototype in include/asm-generic/syscalls.h.

Make this consistent by using an unsigned argument everywhere. This
changes the behavior slightly, as the argument is shifted to a page
number, and an user input with the top bit set would result in a
negative page offset rather than a large one as we use elsewhere.

For riscv, the 32-bit sys_mmap2() definition actually used a custom
type that is different from the global declaration, but this was
missed due to an incorrect type check.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index 933ca6581aba..fabcefe8a80a 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -19,7 +19,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 #ifndef sys_mmap
 asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
 			unsigned long prot, unsigned long flags,
-			unsigned long fd, off_t pgoff);
+			unsigned long fd, unsigned long off);
 #endif
 
 #ifndef sys_rt_sigreturn
-- 
cgit v1.2.3


From 0fa8ab5f3533b307a7d0e438ab08ecd92725dad7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 20 Jun 2024 14:47:27 +0200
Subject: linux/syscalls.h: add missing __user annotations

A couple of declarations in linux/syscalls.h are missing __user
annotations on their pointers, which can lead to warnings from
sparse because these don't match the implementation that have
the correct address space annotations.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/syscalls.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index ba9337709878..63424af87bba 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -322,13 +322,13 @@ asmlinkage long sys_io_pgetevents(aio_context_t ctx_id,
 				long nr,
 				struct io_event __user *events,
 				struct __kernel_timespec __user *timeout,
-				const struct __aio_sigset *sig);
+				const struct __aio_sigset __user *sig);
 asmlinkage long sys_io_pgetevents_time32(aio_context_t ctx_id,
 				long min_nr,
 				long nr,
 				struct io_event __user *events,
 				struct old_timespec32 __user *timeout,
-				const struct __aio_sigset *sig);
+				const struct __aio_sigset __user *sig);
 asmlinkage long sys_io_uring_setup(u32 entries,
 				struct io_uring_params __user *p);
 asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
@@ -441,7 +441,7 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
 asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
 			   umode_t mode);
 asmlinkage long sys_openat2(int dfd, const char __user *filename,
-			    struct open_how *how, size_t size);
+			    struct open_how __user *how, size_t size);
 asmlinkage long sys_close(unsigned int fd);
 asmlinkage long sys_close_range(unsigned int fd, unsigned int max_fd,
 				unsigned int flags);
@@ -555,7 +555,7 @@ asmlinkage long sys_get_robust_list(int pid,
 asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 				    size_t len);
 
-asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
+asmlinkage long sys_futex_waitv(struct futex_waitv __user *waiters,
 				unsigned int nr_futexes, unsigned int flags,
 				struct __kernel_timespec __user *timeout, clockid_t clockid);
 
@@ -907,7 +907,7 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
 asmlinkage long sys_getrandom(char __user *buf, size_t count,
 			      unsigned int flags);
 asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
-asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
+asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size);
 asmlinkage long sys_execveat(int dfd, const char __user *filename,
 			const char __user *const __user *argv,
 			const char __user *const __user *envp, int flags);
@@ -960,11 +960,11 @@ asmlinkage long sys_cachestat(unsigned int fd,
 		struct cachestat_range __user *cstat_range,
 		struct cachestat __user *cstat, unsigned int flags);
 asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags);
-asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx,
-				      u32 *size, u32 flags);
-asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx,
+asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx __user *ctx,
+				      u32 __user *size, u32 flags);
+asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx __user *ctx,
 				      u32 size, u32 flags);
-asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags);
+asmlinkage long sys_lsm_list_modules(u64 __user *ids, u32 __user *size, u32 flags);
 
 /*
  * Architecture-specific system calls
-- 
cgit v1.2.3


From 605efd54b50437ed9f3915690539d0afddca9d95 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 13 May 2024 15:00:41 +0200
Subject: netfilter: nf_tables: make struct nft_trans first member of derived
 subtypes

There is 'struct nft_trans', the basic structure for all transactional
objects, and the the various different transactional objects, such as
nft_trans_table, chain, set, set_elem and so on.

Right now 'struct nft_trans' uses a flexible member at the tail
(data[]), and casting is needed to access the actual type-specific
members.

Change this to make the hierarchy visible in source code, i.e. make
struct nft_trans the first member of all derived subtypes.

This has several advantages:
1. pahole output reflects the real size needed by the particular subtype
2. allows to use container_of() to convert the base type to the actual
   object type instead of casting ->data to the overlay structure.
3. It makes it easy to add intermediate types.

'struct nft_trans' contains a 'binding_list' that is only needed
by two subtypes, so it should be part of the two subtypes, not in
the base structure.

But that makes it hard to interate over the binding_list, because
there is no common base structure.

A follow patch moves the bind list to a new struct:

 struct nft_trans_binding {
   struct nft_trans nft_trans;
   struct list_head binding_list;
 };

... and makes that structure the new 'first member' for both
nft_trans_chain and nft_trans_set.

No functional change intended in this patch.

Some numbers:
 struct nft_trans { /* size: 88, cachelines: 2, members: 5 */
 struct nft_trans_chain { /* size: 152, cachelines: 3, members: 10 */
 struct nft_trans_elem { /* size: 112, cachelines: 2, members: 4 */
 struct nft_trans_flowtable { /* size: 128, cachelines: 2, members: 5 */
 struct nft_trans_obj { /* size: 112, cachelines: 2, members: 4 */
 struct nft_trans_rule { /* size: 112, cachelines: 2, members: 5 */
 struct nft_trans_set { /* size: 120, cachelines: 2, members: 8 */
 struct nft_trans_table { /* size: 96, cachelines: 2, members: 2 */

Of particular interest is nft_trans_elem, which needs to be allocated
once for each pending (to be added or removed) set element.

Add BUILD_BUG_ON to check struct nft_trans is placed at the top of
the container structure.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 162 ++++++++++++++++++++++----------------
 1 file changed, 92 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2796153b03da..b25df037fceb 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1608,14 +1608,16 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
 }
 
 /**
- *	struct nft_trans - nf_tables object update in transaction
+ * struct nft_trans - nf_tables object update in transaction
  *
- *	@list: used internally
- *	@binding_list: list of objects with possible bindings
- *	@msg_type: message type
- *	@put_net: ctx->net needs to be put
- *	@ctx: transaction context
- *	@data: internal information related to the transaction
+ * @list: used internally
+ * @binding_list: list of objects with possible bindings
+ * @msg_type: message type
+ * @put_net: ctx->net needs to be put
+ * @ctx: transaction context
+ *
+ * This is the information common to all objects in the transaction,
+ * this must always be the first member of derived sub-types.
  */
 struct nft_trans {
 	struct list_head		list;
@@ -1623,26 +1625,29 @@ struct nft_trans {
 	int				msg_type;
 	bool				put_net;
 	struct nft_ctx			ctx;
-	char				data[];
 };
 
 struct nft_trans_rule {
+	struct nft_trans		nft_trans;
 	struct nft_rule			*rule;
 	struct nft_flow_rule		*flow;
 	u32				rule_id;
 	bool				bound;
 };
 
-#define nft_trans_rule(trans)	\
-	(((struct nft_trans_rule *)trans->data)->rule)
-#define nft_trans_flow_rule(trans)	\
-	(((struct nft_trans_rule *)trans->data)->flow)
-#define nft_trans_rule_id(trans)	\
-	(((struct nft_trans_rule *)trans->data)->rule_id)
-#define nft_trans_rule_bound(trans)	\
-	(((struct nft_trans_rule *)trans->data)->bound)
+#define nft_trans_container_rule(trans)			\
+	container_of(trans, struct nft_trans_rule, nft_trans)
+#define nft_trans_rule(trans)				\
+	nft_trans_container_rule(trans)->rule
+#define nft_trans_flow_rule(trans)			\
+	nft_trans_container_rule(trans)->flow
+#define nft_trans_rule_id(trans)			\
+	nft_trans_container_rule(trans)->rule_id
+#define nft_trans_rule_bound(trans)			\
+	nft_trans_container_rule(trans)->bound
 
 struct nft_trans_set {
+	struct nft_trans		nft_trans;
 	struct nft_set			*set;
 	u32				set_id;
 	u32				gc_int;
@@ -1652,22 +1657,25 @@ struct nft_trans_set {
 	u32				size;
 };
 
-#define nft_trans_set(trans)	\
-	(((struct nft_trans_set *)trans->data)->set)
-#define nft_trans_set_id(trans)	\
-	(((struct nft_trans_set *)trans->data)->set_id)
-#define nft_trans_set_bound(trans)	\
-	(((struct nft_trans_set *)trans->data)->bound)
-#define nft_trans_set_update(trans)	\
-	(((struct nft_trans_set *)trans->data)->update)
-#define nft_trans_set_timeout(trans)	\
-	(((struct nft_trans_set *)trans->data)->timeout)
-#define nft_trans_set_gc_int(trans)	\
-	(((struct nft_trans_set *)trans->data)->gc_int)
-#define nft_trans_set_size(trans)	\
-	(((struct nft_trans_set *)trans->data)->size)
+#define nft_trans_container_set(trans)			\
+	container_of(trans, struct nft_trans_set, nft_trans)
+#define nft_trans_set(trans)				\
+	nft_trans_container_set(trans)->set
+#define nft_trans_set_id(trans)				\
+	nft_trans_container_set(trans)->set_id
+#define nft_trans_set_bound(trans)			\
+	nft_trans_container_set(trans)->bound
+#define nft_trans_set_update(trans)			\
+	nft_trans_container_set(trans)->update
+#define nft_trans_set_timeout(trans)			\
+	nft_trans_container_set(trans)->timeout
+#define nft_trans_set_gc_int(trans)			\
+	nft_trans_container_set(trans)->gc_int
+#define nft_trans_set_size(trans)			\
+	nft_trans_container_set(trans)->size
 
 struct nft_trans_chain {
+	struct nft_trans		nft_trans;
 	struct nft_chain		*chain;
 	bool				update;
 	char				*name;
@@ -1679,73 +1687,87 @@ struct nft_trans_chain {
 	struct list_head		hook_list;
 };
 
-#define nft_trans_chain(trans)	\
-	(((struct nft_trans_chain *)trans->data)->chain)
-#define nft_trans_chain_update(trans)	\
-	(((struct nft_trans_chain *)trans->data)->update)
-#define nft_trans_chain_name(trans)	\
-	(((struct nft_trans_chain *)trans->data)->name)
-#define nft_trans_chain_stats(trans)	\
-	(((struct nft_trans_chain *)trans->data)->stats)
-#define nft_trans_chain_policy(trans)	\
-	(((struct nft_trans_chain *)trans->data)->policy)
-#define nft_trans_chain_bound(trans)	\
-	(((struct nft_trans_chain *)trans->data)->bound)
-#define nft_trans_chain_id(trans)	\
-	(((struct nft_trans_chain *)trans->data)->chain_id)
-#define nft_trans_basechain(trans)	\
-	(((struct nft_trans_chain *)trans->data)->basechain)
-#define nft_trans_chain_hooks(trans)	\
-	(((struct nft_trans_chain *)trans->data)->hook_list)
+#define nft_trans_container_chain(trans)		\
+	container_of(trans, struct nft_trans_chain, nft_trans)
+#define nft_trans_chain(trans)				\
+	nft_trans_container_chain(trans)->chain
+#define nft_trans_chain_update(trans)			\
+	nft_trans_container_chain(trans)->update
+#define nft_trans_chain_name(trans)			\
+	nft_trans_container_chain(trans)->name
+#define nft_trans_chain_stats(trans)			\
+	nft_trans_container_chain(trans)->stats
+#define nft_trans_chain_policy(trans)			\
+	nft_trans_container_chain(trans)->policy
+#define nft_trans_chain_bound(trans)			\
+	nft_trans_container_chain(trans)->bound
+#define nft_trans_chain_id(trans)			\
+	nft_trans_container_chain(trans)->chain_id
+#define nft_trans_basechain(trans)			\
+	nft_trans_container_chain(trans)->basechain
+#define nft_trans_chain_hooks(trans)			\
+	nft_trans_container_chain(trans)->hook_list
 
 struct nft_trans_table {
+	struct nft_trans		nft_trans;
 	bool				update;
 };
 
-#define nft_trans_table_update(trans)	\
-	(((struct nft_trans_table *)trans->data)->update)
+#define nft_trans_container_table(trans)		\
+	container_of(trans, struct nft_trans_table, nft_trans)
+#define nft_trans_table_update(trans)			\
+	nft_trans_container_table(trans)->update
 
 struct nft_trans_elem {
+	struct nft_trans		nft_trans;
 	struct nft_set			*set;
 	struct nft_elem_priv		*elem_priv;
 	bool				bound;
 };
 
-#define nft_trans_elem_set(trans)	\
-	(((struct nft_trans_elem *)trans->data)->set)
-#define nft_trans_elem_priv(trans)	\
-	(((struct nft_trans_elem *)trans->data)->elem_priv)
-#define nft_trans_elem_set_bound(trans)	\
-	(((struct nft_trans_elem *)trans->data)->bound)
+#define nft_trans_container_elem(t)			\
+	container_of(t, struct nft_trans_elem, nft_trans)
+#define nft_trans_elem_set(trans)			\
+	nft_trans_container_elem(trans)->set
+#define nft_trans_elem_priv(trans)			\
+	nft_trans_container_elem(trans)->elem_priv
+#define nft_trans_elem_set_bound(trans)			\
+	nft_trans_container_elem(trans)->bound
 
 struct nft_trans_obj {
+	struct nft_trans		nft_trans;
 	struct nft_object		*obj;
 	struct nft_object		*newobj;
 	bool				update;
 };
 
-#define nft_trans_obj(trans)	\
-	(((struct nft_trans_obj *)trans->data)->obj)
-#define nft_trans_obj_newobj(trans) \
-	(((struct nft_trans_obj *)trans->data)->newobj)
-#define nft_trans_obj_update(trans)	\
-	(((struct nft_trans_obj *)trans->data)->update)
+#define nft_trans_container_obj(t)			\
+	container_of(t, struct nft_trans_obj, nft_trans)
+#define nft_trans_obj(trans)				\
+	nft_trans_container_obj(trans)->obj
+#define nft_trans_obj_newobj(trans)			\
+	nft_trans_container_obj(trans)->newobj
+#define nft_trans_obj_update(trans)			\
+	nft_trans_container_obj(trans)->update
 
 struct nft_trans_flowtable {
+	struct nft_trans		nft_trans;
 	struct nft_flowtable		*flowtable;
 	bool				update;
 	struct list_head		hook_list;
 	u32				flags;
 };
 
-#define nft_trans_flowtable(trans)	\
-	(((struct nft_trans_flowtable *)trans->data)->flowtable)
-#define nft_trans_flowtable_update(trans)	\
-	(((struct nft_trans_flowtable *)trans->data)->update)
-#define nft_trans_flowtable_hooks(trans)	\
-	(((struct nft_trans_flowtable *)trans->data)->hook_list)
-#define nft_trans_flowtable_flags(trans)	\
-	(((struct nft_trans_flowtable *)trans->data)->flags)
+#define nft_trans_container_flowtable(t)		\
+	container_of(t, struct nft_trans_flowtable, nft_trans)
+#define nft_trans_flowtable(trans)			\
+	nft_trans_container_flowtable(trans)->flowtable
+#define nft_trans_flowtable_update(trans)		\
+	nft_trans_container_flowtable(trans)->update
+#define nft_trans_flowtable_hooks(trans)		\
+	nft_trans_container_flowtable(trans)->hook_list
+#define nft_trans_flowtable_flags(trans)		\
+	nft_trans_container_flowtable(trans)->flags
 
 #define NFT_TRANS_GC_BATCHCOUNT	256
 
-- 
cgit v1.2.3


From 17d8f3ad36a5fa5c93afab90ed03ba7ec748dd03 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 24 Jun 2024 20:53:16 +0200
Subject: netfilter: nf_tables: move bind list_head into relevant subtypes

Only nft_trans_chain and nft_trans_set subtypes use the
trans->binding_list member.

Add a new common binding subtype and move the member there.

This reduces size of all other subtypes by 16 bytes on 64bit platforms.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index b25df037fceb..f72448095833 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1611,7 +1611,6 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
  * struct nft_trans - nf_tables object update in transaction
  *
  * @list: used internally
- * @binding_list: list of objects with possible bindings
  * @msg_type: message type
  * @put_net: ctx->net needs to be put
  * @ctx: transaction context
@@ -1621,12 +1620,23 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
  */
 struct nft_trans {
 	struct list_head		list;
-	struct list_head		binding_list;
 	int				msg_type;
 	bool				put_net;
 	struct nft_ctx			ctx;
 };
 
+/**
+ * struct nft_trans_binding - nf_tables object with binding support in transaction
+ * @nft_trans:    base structure, MUST be first member
+ * @binding_list: list of objects with possible bindings
+ *
+ * This is the base type used by objects that can be bound to a chain.
+ */
+struct nft_trans_binding {
+	struct nft_trans nft_trans;
+	struct list_head binding_list;
+};
+
 struct nft_trans_rule {
 	struct nft_trans		nft_trans;
 	struct nft_rule			*rule;
@@ -1647,7 +1657,7 @@ struct nft_trans_rule {
 	nft_trans_container_rule(trans)->bound
 
 struct nft_trans_set {
-	struct nft_trans		nft_trans;
+	struct nft_trans_binding	nft_trans_binding;
 	struct nft_set			*set;
 	u32				set_id;
 	u32				gc_int;
@@ -1657,8 +1667,8 @@ struct nft_trans_set {
 	u32				size;
 };
 
-#define nft_trans_container_set(trans)			\
-	container_of(trans, struct nft_trans_set, nft_trans)
+#define nft_trans_container_set(t)	\
+	container_of(t, struct nft_trans_set, nft_trans_binding.nft_trans)
 #define nft_trans_set(trans)				\
 	nft_trans_container_set(trans)->set
 #define nft_trans_set_id(trans)				\
@@ -1675,7 +1685,7 @@ struct nft_trans_set {
 	nft_trans_container_set(trans)->size
 
 struct nft_trans_chain {
-	struct nft_trans		nft_trans;
+	struct nft_trans_binding	nft_trans_binding;
 	struct nft_chain		*chain;
 	bool				update;
 	char				*name;
@@ -1687,8 +1697,8 @@ struct nft_trans_chain {
 	struct list_head		hook_list;
 };
 
-#define nft_trans_container_chain(trans)		\
-	container_of(trans, struct nft_trans_chain, nft_trans)
+#define nft_trans_container_chain(t)	\
+	container_of(t, struct nft_trans_chain, nft_trans_binding.nft_trans)
 #define nft_trans_chain(trans)				\
 	nft_trans_container_chain(trans)->chain
 #define nft_trans_chain_update(trans)			\
-- 
cgit v1.2.3


From b3f4c216f7af37fa60e50d2ebb3ec9dd0f93886c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 13 May 2024 15:00:43 +0200
Subject: netfilter: nf_tables: compact chain+ft transaction objects

Cover holes to reduce both structures by 8 byte.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index f72448095833..1f0607b671ac 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1687,10 +1687,10 @@ struct nft_trans_set {
 struct nft_trans_chain {
 	struct nft_trans_binding	nft_trans_binding;
 	struct nft_chain		*chain;
-	bool				update;
 	char				*name;
 	struct nft_stats __percpu	*stats;
 	u8				policy;
+	bool				update;
 	bool				bound;
 	u32				chain_id;
 	struct nft_base_chain		*basechain;
@@ -1763,9 +1763,9 @@ struct nft_trans_obj {
 struct nft_trans_flowtable {
 	struct nft_trans		nft_trans;
 	struct nft_flowtable		*flowtable;
-	bool				update;
 	struct list_head		hook_list;
 	u32				flags;
+	bool				update;
 };
 
 #define nft_trans_container_flowtable(t)		\
-- 
cgit v1.2.3


From 8965d42bcf54d42cbc72fe34a9d0ec3f8527debd Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 13 May 2024 15:00:45 +0200
Subject: netfilter: nf_tables: pass nft_chain to destroy function, not nft_ctx

It would be better to not store nft_ctx inside nft_trans object,
the netlink ctx strucutre is huge and most of its information is
never needed in places that use trans->ctx.

Avoid/reduce its usage if possible, no runtime behaviour change
intended.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1f0607b671ac..328fdc140551 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1171,7 +1171,7 @@ static inline bool nft_chain_is_bound(struct nft_chain *chain)
 
 int nft_chain_add(struct nft_table *table, struct nft_chain *chain);
 void nft_chain_del(struct nft_chain *chain);
-void nf_tables_chain_destroy(struct nft_ctx *ctx);
+void nf_tables_chain_destroy(struct nft_chain *chain);
 
 struct nft_stats {
 	u64			bytes;
-- 
cgit v1.2.3


From 13f20bc9ec4f9f25935bf52337d3d1708787bd55 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 24 Jun 2024 20:57:03 +0200
Subject: netfilter: nf_tables: store chain pointer in rule transaction

Currently the chain can be derived from trans->ctx.chain, but
the ctx will go away soon.

Thus add the chain pointer to nft_trans_rule structure itself.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 328fdc140551..86e6bd63a205 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1640,6 +1640,7 @@ struct nft_trans_binding {
 struct nft_trans_rule {
 	struct nft_trans		nft_trans;
 	struct nft_rule			*rule;
+	struct nft_chain		*chain;
 	struct nft_flow_rule		*flow;
 	u32				rule_id;
 	bool				bound;
@@ -1655,6 +1656,8 @@ struct nft_trans_rule {
 	nft_trans_container_rule(trans)->rule_id
 #define nft_trans_rule_bound(trans)			\
 	nft_trans_container_rule(trans)->bound
+#define nft_trans_rule_chain(trans)	\
+	nft_trans_container_rule(trans)->chain
 
 struct nft_trans_set {
 	struct nft_trans_binding	nft_trans_binding;
-- 
cgit v1.2.3


From e169285f8c56b8d5702475de0582dc83650c6cee Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 13 May 2024 15:00:51 +0200
Subject: netfilter: nf_tables: do not store nft_ctx in transaction objects

nft_ctx is huge and most of the information stored within isn't used
at all.

Remove nft_ctx member from the base transaction structure and store
only what is needed.

After this change, relevant struct sizes are:

struct nft_trans_chain { /* size: 120 (-32), cachelines: 2, members: 10 */
struct nft_trans_elem { /* size: 72 (-40), cachelines: 2, members: 4 */
struct nft_trans_flowtable { /* size: 80 (-48), cachelines: 2, members: 5 */
struct nft_trans_obj { /* size: 72 (-40), cachelines: 2, members: 4 */
struct nft_trans_rule { /* size: 80 (-32), cachelines: 2, members: 6 */
struct nft_trans_set { /* size: 96 (-24), cachelines: 2, members: 8 */
struct nft_trans_table { /* size: 56 (-40), cachelines: 1, members: 2 */

struct nft_trans_elem can now be allocated from kmalloc-96 instead of
kmalloc-128 slab.
A further reduction by 8 bytes would even allow for kmalloc-64.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 43 +++++++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 86e6bd63a205..1e8da1b882ac 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1611,18 +1611,26 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
  * struct nft_trans - nf_tables object update in transaction
  *
  * @list: used internally
+ * @net: struct net
+ * @table: struct nft_table the object resides in
  * @msg_type: message type
- * @put_net: ctx->net needs to be put
- * @ctx: transaction context
+ * @seq: netlink sequence number
+ * @flags: modifiers to new request
+ * @report: notify via unicast netlink message
+ * @put_net: net needs to be put
  *
  * This is the information common to all objects in the transaction,
  * this must always be the first member of derived sub-types.
  */
 struct nft_trans {
 	struct list_head		list;
+	struct net			*net;
+	struct nft_table		*table;
 	int				msg_type;
-	bool				put_net;
-	struct nft_ctx			ctx;
+	u32				seq;
+	u16				flags;
+	u8				report:1;
+	u8				put_net:1;
 };
 
 /**
@@ -1794,6 +1802,33 @@ struct nft_trans_gc {
 	struct rcu_head		rcu;
 };
 
+static inline void nft_ctx_update(struct nft_ctx *ctx,
+				  const struct nft_trans *trans)
+{
+	switch (trans->msg_type) {
+	case NFT_MSG_NEWRULE:
+	case NFT_MSG_DELRULE:
+	case NFT_MSG_DESTROYRULE:
+		ctx->chain = nft_trans_rule_chain(trans);
+		break;
+	case NFT_MSG_NEWCHAIN:
+	case NFT_MSG_DELCHAIN:
+	case NFT_MSG_DESTROYCHAIN:
+		ctx->chain = nft_trans_chain(trans);
+		break;
+	default:
+		ctx->chain = NULL;
+		break;
+	}
+
+	ctx->net = trans->net;
+	ctx->table = trans->table;
+	ctx->family = trans->table->family;
+	ctx->report = trans->report;
+	ctx->flags = trans->flags;
+	ctx->seq = trans->seq;
+}
+
 struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
 					unsigned int gc_seq, gfp_t gfp);
 void nft_trans_gc_destroy(struct nft_trans_gc *trans);
-- 
cgit v1.2.3


From 98a563de231fcc91d65c6028c7f646fe28faf83a Mon Sep 17 00:00:00 2001
From: Dumitru Ceclan <dumitru.ceclan@analog.com>
Date: Fri, 7 Jun 2024 17:53:09 +0300
Subject: iio: adc: ad_sigma_delta: add disable_one callback

Sigma delta ADCs with a sequencer need to disable the previously enabled
channel when reading using ad_sigma_delta_single_conversion(). This was
done manually in drivers for devices with sequencers.

This patch implements handling of single channel disabling after a
single conversion.

Reviewed-by: Nuno Sa <nuno.sa@analog.com>
Signed-off-by: Dumitru Ceclan <dumitru.ceclan@analog.com>
Reviewed-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20240607-ad4111-v7-3-97e3855900a0@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/adc/ad_sigma_delta.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h
index 383614ebd760..f8c1d2505940 100644
--- a/include/linux/iio/adc/ad_sigma_delta.h
+++ b/include/linux/iio/adc/ad_sigma_delta.h
@@ -37,6 +37,10 @@ struct iio_dev;
  * @append_status: Will be called to enable status append at the end of the sample, may be NULL.
  * @set_mode: Will be called to select the current mode, may be NULL.
  * @disable_all: Will be called to disable all channels, may be NULL.
+ * @disable_one: Will be called to disable a single channel after
+ *		ad_sigma_delta_single_conversion(), may be NULL.
+ *		Usage of this callback expects iio_chan_spec.address to contain
+ *		the value required for the driver to identify the channel.
  * @postprocess_sample: Is called for each sampled data word, can be used to
  *		modify or drop the sample data, it, may be NULL.
  * @has_registers: true if the device has writable and readable registers, false
@@ -55,6 +59,7 @@ struct ad_sigma_delta_info {
 	int (*append_status)(struct ad_sigma_delta *, bool append);
 	int (*set_mode)(struct ad_sigma_delta *, enum ad_sigma_delta_mode mode);
 	int (*disable_all)(struct ad_sigma_delta *);
+	int (*disable_one)(struct ad_sigma_delta *, unsigned int chan);
 	int (*postprocess_sample)(struct ad_sigma_delta *, unsigned int raw_sample);
 	bool has_registers;
 	unsigned int addr_shift;
@@ -140,6 +145,15 @@ static inline int ad_sigma_delta_disable_all(struct ad_sigma_delta *sd)
 	return 0;
 }
 
+static inline int ad_sigma_delta_disable_one(struct ad_sigma_delta *sd,
+					     unsigned int chan)
+{
+	if (sd->info->disable_one)
+		return sd->info->disable_one(sd, chan);
+
+	return 0;
+}
+
 static inline int ad_sigma_delta_set_mode(struct ad_sigma_delta *sd,
 	unsigned int mode)
 {
-- 
cgit v1.2.3


From d305b7f34ee11c4901b8da150b641bcffbd3e951 Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa@analog.com>
Date: Tue, 18 Jun 2024 15:32:05 +0200
Subject: iio: imu: adis: move to the cleanup magic

This makes locking and handling error paths simpler.

Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240618-dev-iio-adis-cleanup-v1-2-bd93ce7845c7@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/imu/adis.h | 54 +++++++++++---------------------------------
 1 file changed, 13 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index 4bb0a53cf7ea..93dad627378f 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -9,6 +9,7 @@
 #ifndef __IIO_ADIS_H__
 #define __IIO_ADIS_H__
 
+#include <linux/cleanup.h>
 #include <linux/spi/spi.h>
 #include <linux/interrupt.h>
 #include <linux/iio/iio.h>
@@ -150,13 +151,8 @@ int __adis_reset(struct adis *adis);
  */
 static inline int adis_reset(struct adis *adis)
 {
-	int ret;
-
-	mutex_lock(&adis->state_lock);
-	ret = __adis_reset(adis);
-	mutex_unlock(&adis->state_lock);
-
-	return ret;
+	guard(mutex)(&adis->state_lock);
+	return __adis_reset(adis);
 }
 
 int __adis_write_reg(struct adis *adis, unsigned int reg,
@@ -248,13 +244,8 @@ static inline int __adis_read_reg_32(struct adis *adis, unsigned int reg,
 static inline int adis_write_reg(struct adis *adis, unsigned int reg,
 				 unsigned int val, unsigned int size)
 {
-	int ret;
-
-	mutex_lock(&adis->state_lock);
-	ret = __adis_write_reg(adis, reg, val, size);
-	mutex_unlock(&adis->state_lock);
-
-	return ret;
+	guard(mutex)(&adis->state_lock);
+	return __adis_write_reg(adis, reg, val, size);
 }
 
 /**
@@ -267,13 +258,8 @@ static inline int adis_write_reg(struct adis *adis, unsigned int reg,
 static int adis_read_reg(struct adis *adis, unsigned int reg,
 			 unsigned int *val, unsigned int size)
 {
-	int ret;
-
-	mutex_lock(&adis->state_lock);
-	ret = __adis_read_reg(adis, reg, val, size);
-	mutex_unlock(&adis->state_lock);
-
-	return ret;
+	guard(mutex)(&adis->state_lock);
+	return __adis_read_reg(adis, reg, val, size);
 }
 
 /**
@@ -365,12 +351,8 @@ int __adis_update_bits_base(struct adis *adis, unsigned int reg, const u32 mask,
 static inline int adis_update_bits_base(struct adis *adis, unsigned int reg,
 					const u32 mask, const u32 val, u8 size)
 {
-	int ret;
-
-	mutex_lock(&adis->state_lock);
-	ret = __adis_update_bits_base(adis, reg, mask, val, size);
-	mutex_unlock(&adis->state_lock);
-	return ret;
+	guard(mutex)(&adis->state_lock);
+	return __adis_update_bits_base(adis, reg, mask, val, size);
 }
 
 /**
@@ -411,24 +393,14 @@ int __adis_enable_irq(struct adis *adis, bool enable);
 
 static inline int adis_enable_irq(struct adis *adis, bool enable)
 {
-	int ret;
-
-	mutex_lock(&adis->state_lock);
-	ret = __adis_enable_irq(adis, enable);
-	mutex_unlock(&adis->state_lock);
-
-	return ret;
+	guard(mutex)(&adis->state_lock);
+	return __adis_enable_irq(adis, enable);
 }
 
 static inline int adis_check_status(struct adis *adis)
 {
-	int ret;
-
-	mutex_lock(&adis->state_lock);
-	ret = __adis_check_status(adis);
-	mutex_unlock(&adis->state_lock);
-
-	return ret;
+	guard(mutex)(&adis->state_lock);
+	return __adis_check_status(adis);
 }
 
 static inline void adis_dev_lock(struct adis *adis)
-- 
cgit v1.2.3


From e6cab1ad9769a6b03dd1ba7ace106c36de85b1a8 Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa@analog.com>
Date: Tue, 18 Jun 2024 15:32:06 +0200
Subject: iio: imu: adis: add cleanup based lock helpers

Add two new lock helpers that make use of the cleanup guard() and
scoped_guard() macros. Thus, users won't have to worry about unlocking
which is less prone to errors and allows for simpler error paths.

Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240618-dev-iio-adis-cleanup-v1-3-bd93ce7845c7@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/imu/adis.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index 93dad627378f..bc7332d12c44 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -403,6 +403,10 @@ static inline int adis_check_status(struct adis *adis)
 	return __adis_check_status(adis);
 }
 
+#define adis_dev_auto_lock(adis)	guard(mutex)(&(adis)->state_lock)
+#define adis_dev_auto_scoped_lock(adis) \
+	scoped_guard(mutex, &(adis)->state_lock)
+
 static inline void adis_dev_lock(struct adis *adis)
 {
 	mutex_lock(&adis->state_lock);
-- 
cgit v1.2.3


From bb78ad627659fc7a738356951999f2ba79e68059 Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa@analog.com>
Date: Tue, 18 Jun 2024 15:32:12 +0200
Subject: iio: imu: adis: remove legacy lock helpers

Since all users were converted to the new cleanup based helper,
adis_dev_lock() and adis_dev_unlock() can now be removed from the lib.

Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240618-dev-iio-adis-cleanup-v1-9-bd93ce7845c7@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/imu/adis.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index bc7332d12c44..e6a75356567a 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -407,16 +407,6 @@ static inline int adis_check_status(struct adis *adis)
 #define adis_dev_auto_scoped_lock(adis) \
 	scoped_guard(mutex, &(adis)->state_lock)
 
-static inline void adis_dev_lock(struct adis *adis)
-{
-	mutex_lock(&adis->state_lock);
-}
-
-static inline void adis_dev_unlock(struct adis *adis)
-{
-	mutex_unlock(&adis->state_lock);
-}
-
 int adis_single_conversion(struct iio_dev *indio_dev,
 			   const struct iio_chan_spec *chan,
 			   unsigned int error_mask, int *val);
-- 
cgit v1.2.3


From ca567df74a28a9fb368c6b2d93e864113f73f5c2 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Sun, 7 Jun 2020 22:47:08 +0200
Subject: nsfs: add pid translation ioctls

Add ioctl()s to translate pids between pid namespaces.

LXCFS is a tiny fuse filesystem used to virtualize various aspects of
procfs. LXCFS is run on the host. The files and directories it creates
can be bind-mounted by e.g. a container at startup and mounted over the
various procfs files the container wishes to have virtualized. When e.g.
a read request for uptime is received, LXCFS will receive the pid of the
reader. In order to virtualize the corresponding read, LXCFS needs to
know the pid of the init process of the reader's pid namespace. In order
to do this, LXCFS first needs to fork() two helper processes. The first
helper process setns() to the readers pid namespace. The second helper
process is needed to create a process that is a proper member of the pid
namespace. The second helper process then creates a ucred message with
ucred.pid set to 1 and sends it back to LXCFS. The kernel will translate
the ucred.pid field to the corresponding pid number in LXCFS's pid
namespace. This way LXCFS can learn the init pid number of the reader's
pid namespace and can go on to virtualize. Since these two forks() are
costly LXCFS maintains an init pid cache that caches a given pid for a
fixed amount of time. The cache is pruned during new read requests.
However, even with the cache the hit of the two forks() is singificant
when a very large number of containers are running. With this simple
patch we add an ns ioctl that let's a caller retrieve the init pid nr of
a pid namespace through its pid namespace fd. This significantly
improves performance with a very simple change.

Support translation of pids and tgids. Other concepts can be added but
there are no obvious users for this right now.

To protect against races pidfds can be used to check whether the process
is still valid. If needed, this can also be extended to work on pidfds
directly.

Link: https://lore.kernel.org/r/20240619-work-ns_ioctl-v1-1-7c0097e6bb6b@kernel.org
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/nsfs.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index a0c8552b64ee..faeb9195da08 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -15,5 +15,13 @@
 #define NS_GET_NSTYPE		_IO(NSIO, 0x3)
 /* Get owner UID (in the caller's user namespace) for a user namespace */
 #define NS_GET_OWNER_UID	_IO(NSIO, 0x4)
+/* Translate pid from target pid namespace into the caller's pid namespace. */
+#define NS_GET_PID_FROM_PIDNS	_IOR(NSIO, 0x5, int)
+/* Return thread-group leader id of pid in the callers pid namespace. */
+#define NS_GET_TGID_FROM_PIDNS	_IOR(NSIO, 0x7, int)
+/* Translate pid from caller's pid namespace into a target pid namespace. */
+#define NS_GET_PID_IN_PIDNS	_IOR(NSIO, 0x6, int)
+/* Return thread-group leader id of pid in the target pid namespace. */
+#define NS_GET_TGID_IN_PIDNS	_IOR(NSIO, 0x8, int)
 
 #endif /* __LINUX_NSFS_H */
-- 
cgit v1.2.3


From e29630247be24c3987e2b048f8e152771b32d38b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 3 Jun 2024 20:16:59 +0200
Subject: netfilter: nf_tables: rise cap on SELinux secmark context

secmark context is artificially limited 256 bytes, rise it to 4Kbytes.

Fixes: fb961945457f ("netfilter: nf_tables: add SECMARK support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index aa4094ca2444..639894ed1b97 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1376,7 +1376,7 @@ enum nft_secmark_attributes {
 #define NFTA_SECMARK_MAX	(__NFTA_SECMARK_MAX - 1)
 
 /* Max security context length */
-#define NFT_SECMARK_CTX_MAXLEN		256
+#define NFT_SECMARK_CTX_MAXLEN		4096
 
 /**
  * enum nft_reject_types - nf_tables reject expression reject types
-- 
cgit v1.2.3


From a6a61b9701d1add3bb6d86d8e259d833ea91a1a6 Mon Sep 17 00:00:00 2001
From: Jagadeesh Kona <quic_jkona@quicinc.com>
Date: Sun, 2 Jun 2024 17:14:33 +0530
Subject: dt-bindings: clock: qcom: Add SM8650 video clock controller

SM8650 video clock controller has most clocks same as SM8450,
but it also has few additional clocks and resets. Add device tree
bindings for the video clock controller on Qualcomm SM8650 platform
by defining these additional clocks and resets on top of SM8450.

Signed-off-by: Jagadeesh Kona <quic_jkona@quicinc.com>
Reviewed-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Vladimir Zapolskiy <vladimir.zapolskiy@linaro.org>
Link: https://lore.kernel.org/r/20240602114439.1611-3-quic_jkona@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sm8650-videocc.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sm8650-videocc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sm8650-videocc.h b/include/dt-bindings/clock/qcom,sm8650-videocc.h
new file mode 100644
index 000000000000..4e3c2d87280f
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sm8650-videocc.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_VIDEO_CC_SM8650_H
+#define _DT_BINDINGS_CLK_QCOM_VIDEO_CC_SM8650_H
+
+#include "qcom,sm8450-videocc.h"
+
+/* SM8650 introduces below new clocks and resets compared to SM8450 */
+
+/* VIDEO_CC clocks */
+#define VIDEO_CC_MVS0_SHIFT_CLK					12
+#define VIDEO_CC_MVS0C_SHIFT_CLK				13
+#define VIDEO_CC_MVS1_SHIFT_CLK					14
+#define VIDEO_CC_MVS1C_SHIFT_CLK				15
+#define VIDEO_CC_XO_CLK_SRC					16
+
+/* VIDEO_CC resets */
+#define VIDEO_CC_XO_CLK_ARES					7
+
+#endif
-- 
cgit v1.2.3


From 1ae3f0578e0e623e774db45870c0e34c47d8dd15 Mon Sep 17 00:00:00 2001
From: Jagadeesh Kona <quic_jkona@quicinc.com>
Date: Sun, 2 Jun 2024 17:14:37 +0530
Subject: dt-bindings: clock: qcom: Add SM8650 camera clock controller

Add device tree bindings for the camera clock controller on
Qualcomm SM8650 platform.

Signed-off-by: Jagadeesh Kona <quic_jkona@quicinc.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Vladimir Zapolskiy <vladimir.zapolskiy@linaro.org>
Link: https://lore.kernel.org/r/20240602114439.1611-7-quic_jkona@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/clock/qcom,sm8650-camcc.h | 195 ++++++++++++++++++++++++++
 1 file changed, 195 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,sm8650-camcc.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/qcom,sm8650-camcc.h b/include/dt-bindings/clock/qcom,sm8650-camcc.h
new file mode 100644
index 000000000000..df73bf35f4bf
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,sm8650-camcc.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_CAM_CC_SM8650_H
+#define _DT_BINDINGS_CLK_QCOM_CAM_CC_SM8650_H
+
+/* CAM_CC clocks */
+#define CAM_CC_BPS_AHB_CLK					0
+#define CAM_CC_BPS_CLK						1
+#define CAM_CC_BPS_CLK_SRC					2
+#define CAM_CC_BPS_FAST_AHB_CLK					3
+#define CAM_CC_BPS_SHIFT_CLK					4
+#define CAM_CC_CAMNOC_AXI_NRT_CLK				5
+#define CAM_CC_CAMNOC_AXI_RT_CLK				6
+#define CAM_CC_CAMNOC_AXI_RT_CLK_SRC				7
+#define CAM_CC_CAMNOC_DCD_XO_CLK				8
+#define CAM_CC_CAMNOC_XO_CLK					9
+#define CAM_CC_CCI_0_CLK					10
+#define CAM_CC_CCI_0_CLK_SRC					11
+#define CAM_CC_CCI_1_CLK					12
+#define CAM_CC_CCI_1_CLK_SRC					13
+#define CAM_CC_CCI_2_CLK					14
+#define CAM_CC_CCI_2_CLK_SRC					15
+#define CAM_CC_CORE_AHB_CLK					16
+#define CAM_CC_CPAS_AHB_CLK					17
+#define CAM_CC_CPAS_BPS_CLK					18
+#define CAM_CC_CPAS_CRE_CLK					19
+#define CAM_CC_CPAS_FAST_AHB_CLK				20
+#define CAM_CC_CPAS_IFE_0_CLK					21
+#define CAM_CC_CPAS_IFE_1_CLK					22
+#define CAM_CC_CPAS_IFE_2_CLK					23
+#define CAM_CC_CPAS_IFE_LITE_CLK				24
+#define CAM_CC_CPAS_IPE_NPS_CLK					25
+#define CAM_CC_CPAS_SBI_CLK					26
+#define CAM_CC_CPAS_SFE_0_CLK					27
+#define CAM_CC_CPAS_SFE_1_CLK					28
+#define CAM_CC_CPAS_SFE_2_CLK					29
+#define CAM_CC_CPHY_RX_CLK_SRC					30
+#define CAM_CC_CRE_AHB_CLK					31
+#define CAM_CC_CRE_CLK						32
+#define CAM_CC_CRE_CLK_SRC					33
+#define CAM_CC_CSI0PHYTIMER_CLK					34
+#define CAM_CC_CSI0PHYTIMER_CLK_SRC				35
+#define CAM_CC_CSI1PHYTIMER_CLK					36
+#define CAM_CC_CSI1PHYTIMER_CLK_SRC				37
+#define CAM_CC_CSI2PHYTIMER_CLK					38
+#define CAM_CC_CSI2PHYTIMER_CLK_SRC				39
+#define CAM_CC_CSI3PHYTIMER_CLK					40
+#define CAM_CC_CSI3PHYTIMER_CLK_SRC				41
+#define CAM_CC_CSI4PHYTIMER_CLK					42
+#define CAM_CC_CSI4PHYTIMER_CLK_SRC				43
+#define CAM_CC_CSI5PHYTIMER_CLK					44
+#define CAM_CC_CSI5PHYTIMER_CLK_SRC				45
+#define CAM_CC_CSI6PHYTIMER_CLK					46
+#define CAM_CC_CSI6PHYTIMER_CLK_SRC				47
+#define CAM_CC_CSI7PHYTIMER_CLK					48
+#define CAM_CC_CSI7PHYTIMER_CLK_SRC				49
+#define CAM_CC_CSID_CLK						50
+#define CAM_CC_CSID_CLK_SRC					51
+#define CAM_CC_CSID_CSIPHY_RX_CLK				52
+#define CAM_CC_CSIPHY0_CLK					53
+#define CAM_CC_CSIPHY1_CLK					54
+#define CAM_CC_CSIPHY2_CLK					55
+#define CAM_CC_CSIPHY3_CLK					56
+#define CAM_CC_CSIPHY4_CLK					57
+#define CAM_CC_CSIPHY5_CLK					58
+#define CAM_CC_CSIPHY6_CLK					59
+#define CAM_CC_CSIPHY7_CLK					60
+#define CAM_CC_DRV_AHB_CLK					61
+#define CAM_CC_DRV_XO_CLK					62
+#define CAM_CC_FAST_AHB_CLK_SRC					63
+#define CAM_CC_GDSC_CLK						64
+#define CAM_CC_ICP_AHB_CLK					65
+#define CAM_CC_ICP_CLK						66
+#define CAM_CC_ICP_CLK_SRC					67
+#define CAM_CC_IFE_0_CLK					68
+#define CAM_CC_IFE_0_CLK_SRC					69
+#define CAM_CC_IFE_0_FAST_AHB_CLK				70
+#define CAM_CC_IFE_0_SHIFT_CLK					71
+#define CAM_CC_IFE_1_CLK					72
+#define CAM_CC_IFE_1_CLK_SRC					73
+#define CAM_CC_IFE_1_FAST_AHB_CLK				74
+#define CAM_CC_IFE_1_SHIFT_CLK					75
+#define CAM_CC_IFE_2_CLK					76
+#define CAM_CC_IFE_2_CLK_SRC					77
+#define CAM_CC_IFE_2_FAST_AHB_CLK				78
+#define CAM_CC_IFE_2_SHIFT_CLK					79
+#define CAM_CC_IFE_LITE_AHB_CLK					80
+#define CAM_CC_IFE_LITE_CLK					81
+#define CAM_CC_IFE_LITE_CLK_SRC					82
+#define CAM_CC_IFE_LITE_CPHY_RX_CLK				83
+#define CAM_CC_IFE_LITE_CSID_CLK				84
+#define CAM_CC_IFE_LITE_CSID_CLK_SRC				85
+#define CAM_CC_IPE_NPS_AHB_CLK					86
+#define CAM_CC_IPE_NPS_CLK					87
+#define CAM_CC_IPE_NPS_CLK_SRC					88
+#define CAM_CC_IPE_NPS_FAST_AHB_CLK				89
+#define CAM_CC_IPE_PPS_CLK					90
+#define CAM_CC_IPE_PPS_FAST_AHB_CLK				91
+#define CAM_CC_IPE_SHIFT_CLK					92
+#define CAM_CC_JPEG_1_CLK					93
+#define CAM_CC_JPEG_CLK						94
+#define CAM_CC_JPEG_CLK_SRC					95
+#define CAM_CC_MCLK0_CLK					96
+#define CAM_CC_MCLK0_CLK_SRC					97
+#define CAM_CC_MCLK1_CLK					98
+#define CAM_CC_MCLK1_CLK_SRC					99
+#define CAM_CC_MCLK2_CLK					100
+#define CAM_CC_MCLK2_CLK_SRC					101
+#define CAM_CC_MCLK3_CLK					102
+#define CAM_CC_MCLK3_CLK_SRC					103
+#define CAM_CC_MCLK4_CLK					104
+#define CAM_CC_MCLK4_CLK_SRC					105
+#define CAM_CC_MCLK5_CLK					106
+#define CAM_CC_MCLK5_CLK_SRC					107
+#define CAM_CC_MCLK6_CLK					108
+#define CAM_CC_MCLK6_CLK_SRC					109
+#define CAM_CC_MCLK7_CLK					110
+#define CAM_CC_MCLK7_CLK_SRC					111
+#define CAM_CC_PLL0						112
+#define CAM_CC_PLL0_OUT_EVEN					113
+#define CAM_CC_PLL0_OUT_ODD					114
+#define CAM_CC_PLL1						115
+#define CAM_CC_PLL1_OUT_EVEN					116
+#define CAM_CC_PLL2						117
+#define CAM_CC_PLL3						118
+#define CAM_CC_PLL3_OUT_EVEN					119
+#define CAM_CC_PLL4						120
+#define CAM_CC_PLL4_OUT_EVEN					121
+#define CAM_CC_PLL5						122
+#define CAM_CC_PLL5_OUT_EVEN					123
+#define CAM_CC_PLL6						124
+#define CAM_CC_PLL6_OUT_EVEN					125
+#define CAM_CC_PLL7						126
+#define CAM_CC_PLL7_OUT_EVEN					127
+#define CAM_CC_PLL8						128
+#define CAM_CC_PLL8_OUT_EVEN					129
+#define CAM_CC_PLL9						130
+#define CAM_CC_PLL9_OUT_EVEN					131
+#define CAM_CC_PLL9_OUT_ODD					132
+#define CAM_CC_PLL10						133
+#define CAM_CC_PLL10_OUT_EVEN					134
+#define CAM_CC_QDSS_DEBUG_CLK					135
+#define CAM_CC_QDSS_DEBUG_CLK_SRC				136
+#define CAM_CC_QDSS_DEBUG_XO_CLK				137
+#define CAM_CC_SBI_CLK						138
+#define CAM_CC_SBI_FAST_AHB_CLK					139
+#define CAM_CC_SBI_SHIFT_CLK					140
+#define CAM_CC_SFE_0_CLK					141
+#define CAM_CC_SFE_0_CLK_SRC					142
+#define CAM_CC_SFE_0_FAST_AHB_CLK				143
+#define CAM_CC_SFE_0_SHIFT_CLK					144
+#define CAM_CC_SFE_1_CLK					145
+#define CAM_CC_SFE_1_CLK_SRC					146
+#define CAM_CC_SFE_1_FAST_AHB_CLK				147
+#define CAM_CC_SFE_1_SHIFT_CLK					148
+#define CAM_CC_SFE_2_CLK					149
+#define CAM_CC_SFE_2_CLK_SRC					150
+#define CAM_CC_SFE_2_FAST_AHB_CLK				151
+#define CAM_CC_SFE_2_SHIFT_CLK					152
+#define CAM_CC_SLEEP_CLK					153
+#define CAM_CC_SLEEP_CLK_SRC					154
+#define CAM_CC_SLOW_AHB_CLK_SRC					155
+#define CAM_CC_TITAN_TOP_SHIFT_CLK				156
+#define CAM_CC_XO_CLK_SRC					157
+
+/* CAM_CC power domains */
+#define CAM_CC_TITAN_TOP_GDSC					0
+#define CAM_CC_BPS_GDSC						1
+#define CAM_CC_IFE_0_GDSC					2
+#define CAM_CC_IFE_1_GDSC					3
+#define CAM_CC_IFE_2_GDSC					4
+#define CAM_CC_IPE_0_GDSC					5
+#define CAM_CC_SBI_GDSC						6
+#define CAM_CC_SFE_0_GDSC					7
+#define CAM_CC_SFE_1_GDSC					8
+#define CAM_CC_SFE_2_GDSC					9
+
+/* CAM_CC resets */
+#define CAM_CC_BPS_BCR						0
+#define CAM_CC_DRV_BCR						1
+#define CAM_CC_ICP_BCR						2
+#define CAM_CC_IFE_0_BCR					3
+#define CAM_CC_IFE_1_BCR					4
+#define CAM_CC_IFE_2_BCR					5
+#define CAM_CC_IPE_0_BCR					6
+#define CAM_CC_QDSS_DEBUG_BCR					7
+#define CAM_CC_SBI_BCR						8
+#define CAM_CC_SFE_0_BCR					9
+#define CAM_CC_SFE_1_BCR					10
+#define CAM_CC_SFE_2_BCR					11
+
+#endif
-- 
cgit v1.2.3


From 0e942053e4dc42a760f48c1981f3239825430f15 Mon Sep 17 00:00:00 2001
From: Heng Qi <hengqi@linux.alibaba.com>
Date: Fri, 21 Jun 2024 18:13:49 +0800
Subject: linux/dim: move useful macros to .h file

Useful macros will be used effectively elsewhere.
These will be utilized in subsequent patches.

Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240621101353.107425-2-hengqi@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dim.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/dim.h b/include/linux/dim.h
index f343bc9aa2ec..43398f5eade2 100644
--- a/include/linux/dim.h
+++ b/include/linux/dim.h
@@ -10,6 +10,13 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+/* Number of DIM profiles and period mode. */
+#define NET_DIM_PARAMS_NUM_PROFILES 5
+#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256
+#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128
+#define NET_DIM_DEF_PROFILE_CQE 1
+#define NET_DIM_DEF_PROFILE_EQE 1
+
 /*
  * Number of events between DIM iterations.
  * Causes a moderation of the algorithm run.
-- 
cgit v1.2.3


From f750dfe825b904164688adeb147950e0e0c4d262 Mon Sep 17 00:00:00 2001
From: Heng Qi <hengqi@linux.alibaba.com>
Date: Fri, 21 Jun 2024 18:13:51 +0800
Subject: ethtool: provide customized dim profile management

The NetDIM library, currently leveraged by an array of NICs, delivers
excellent acceleration benefits. Nevertheless, NICs vary significantly
in their dim profile list prerequisites.

Specifically, virtio-net backends may present diverse sw or hw device
implementation, making a one-size-fits-all parameter list impractical.
On Alibaba Cloud, the virtio DPU's performance under the default DIM
profile falls short of expectations, partly due to a mismatch in
parameter configuration.

I also noticed that ice/idpf/ena and other NICs have customized
profilelist or placed some restrictions on dim capabilities.

Motivated by this, I tried adding new params for "ethtool -C" that provides
a per-device control to modify and access a device's interrupt parameters.

Usage
========
The target NIC is named ethx.

Assume that ethx only declares support for rx profile setting
(with DIM_PROFILE_RX flag set in profile_flags) and supports modification
of usec and pkt fields.

1. Query the currently customized list of the device

$ ethtool -c ethx
...
rx-profile:
{.usec =   1, .pkts = 256, .comps = n/a,},
{.usec =   8, .pkts = 256, .comps = n/a,},
{.usec =  64, .pkts = 256, .comps = n/a,},
{.usec = 128, .pkts = 256, .comps = n/a,},
{.usec = 256, .pkts = 256, .comps = n/a,}
tx-profile:   n/a

2. Tune
$ ethtool -C ethx rx-profile 1,1,n_2,n,n_3,3,n_4,4,n_n,5,n
"n" means do not modify this field.
$ ethtool -c ethx
...
rx-profile:
{.usec =   1, .pkts =   1, .comps = n/a,},
{.usec =   2, .pkts = 256, .comps = n/a,},
{.usec =   3, .pkts =   3, .comps = n/a,},
{.usec =   4, .pkts =   4, .comps = n/a,},
{.usec = 256, .pkts =   5, .comps = n/a,}
tx-profile:   n/a

3. Hint
If the device does not support some type of customized dim profiles,
the corresponding "n/a" will display.

If the "n/a" field is being modified, -EOPNOTSUPP will be reported.

Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240621101353.107425-4-hengqi@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dim.h                  | 58 ++++++++++++++++++++++++++++++++++++
 include/linux/ethtool.h              |  4 ++-
 include/linux/netdevice.h            |  3 ++
 include/uapi/linux/ethtool_netlink.h | 22 ++++++++++++++
 4 files changed, 86 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dim.h b/include/linux/dim.h
index 43398f5eade2..e0f39bd85432 100644
--- a/include/linux/dim.h
+++ b/include/linux/dim.h
@@ -10,6 +10,8 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+struct net_device;
+
 /* Number of DIM profiles and period mode. */
 #define NET_DIM_PARAMS_NUM_PROFILES 5
 #define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256
@@ -45,12 +47,45 @@
  * @pkts: CQ packet counter suggestion (by DIM)
  * @comps: Completion counter
  * @cq_period_mode: CQ period count mode (from CQE/EQE)
+ * @rcu: for asynchronous kfree_rcu
  */
 struct dim_cq_moder {
 	u16 usec;
 	u16 pkts;
 	u16 comps;
 	u8 cq_period_mode;
+	struct rcu_head rcu;
+};
+
+#define DIM_PROFILE_RX		BIT(0)	/* support rx profile modification */
+#define DIM_PROFILE_TX		BIT(1)	/* support tx profile modification */
+
+#define DIM_COALESCE_USEC	BIT(0)	/* support usec field modification */
+#define DIM_COALESCE_PKTS	BIT(1)	/* support pkts field modification */
+#define DIM_COALESCE_COMPS	BIT(2)	/* support comps field modification */
+
+/**
+ * struct dim_irq_moder - Structure for irq moderation information.
+ * Used to collect irq moderation related information.
+ *
+ * @profile_flags: DIM_PROFILE_*
+ * @coal_flags: DIM_COALESCE_* for Rx and Tx
+ * @dim_rx_mode: Rx DIM period count mode: CQE or EQE
+ * @dim_tx_mode: Tx DIM period count mode: CQE or EQE
+ * @rx_profile: DIM profile list for Rx
+ * @tx_profile: DIM profile list for Tx
+ * @rx_dim_work: Rx DIM worker scheduled by net_dim()
+ * @tx_dim_work: Tx DIM worker scheduled by net_dim()
+ */
+struct dim_irq_moder {
+	u8 profile_flags;
+	u8 coal_flags;
+	u8 dim_rx_mode;
+	u8 dim_tx_mode;
+	struct dim_cq_moder __rcu *rx_profile;
+	struct dim_cq_moder __rcu *tx_profile;
+	void (*rx_dim_work)(struct work_struct *work);
+	void (*tx_dim_work)(struct work_struct *work);
 };
 
 /**
@@ -198,6 +233,29 @@ enum dim_step_result {
 	DIM_ON_EDGE,
 };
 
+/**
+ * net_dim_init_irq_moder - collect information to initialize irq moderation
+ * @dev: target network device
+ * @profile_flags: Rx or Tx profile modification capability
+ * @coal_flags: irq moderation params flags
+ * @rx_mode: CQ period mode for Rx
+ * @tx_mode: CQ period mode for Tx
+ * @rx_dim_work: Rx worker called after dim decision
+ * @tx_dim_work: Tx worker called after dim decision
+ *
+ * Return: 0 on success or a negative error code.
+ */
+int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags,
+			   u8 coal_flags, u8 rx_mode, u8 tx_mode,
+			   void (*rx_dim_work)(struct work_struct *work),
+			   void (*tx_dim_work)(struct work_struct *work));
+
+/**
+ * net_dim_free_irq_moder - free fields for irq moderation
+ * @dev: target network device
+ */
+void net_dim_free_irq_moder(struct net_device *dev);
+
 /**
  *	dim_on_top - check if current state is a good place to stop (top location)
  *	@dim: DIM context
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 6fd9107d3cc0..959196af7f5a 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -284,7 +284,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
 #define ETHTOOL_COALESCE_TX_AGGR_MAX_BYTES	BIT(24)
 #define ETHTOOL_COALESCE_TX_AGGR_MAX_FRAMES	BIT(25)
 #define ETHTOOL_COALESCE_TX_AGGR_TIME_USECS	BIT(26)
-#define ETHTOOL_COALESCE_ALL_PARAMS		GENMASK(26, 0)
+#define ETHTOOL_COALESCE_RX_PROFILE		BIT(27)
+#define ETHTOOL_COALESCE_TX_PROFILE		BIT(28)
+#define ETHTOOL_COALESCE_ALL_PARAMS		GENMASK(28, 0)
 
 #define ETHTOOL_COALESCE_USECS						\
 	(ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4e81660b4462..cc18acd3c58b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2402,6 +2402,9 @@ struct net_device {
 	/** @page_pools: page pools created for this netdevice */
 	struct hlist_head	page_pools;
 #endif
+
+	/** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
+	struct dim_irq_moder	*irq_moder;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index b49b804b9495..d15856c7e001 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -415,12 +415,34 @@ enum {
 	ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES,		/* u32 */
 	ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES,		/* u32 */
 	ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS,		/* u32 */
+	/* nest - _A_PROFILE_IRQ_MODERATION */
+	ETHTOOL_A_COALESCE_RX_PROFILE,
+	/* nest - _A_PROFILE_IRQ_MODERATION */
+	ETHTOOL_A_COALESCE_TX_PROFILE,
 
 	/* add new constants above here */
 	__ETHTOOL_A_COALESCE_CNT,
 	ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1)
 };
 
+enum {
+	ETHTOOL_A_PROFILE_UNSPEC,
+	/* nest, _A_IRQ_MODERATION_* */
+	ETHTOOL_A_PROFILE_IRQ_MODERATION,
+	__ETHTOOL_A_PROFILE_CNT,
+	ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_IRQ_MODERATION_UNSPEC,
+	ETHTOOL_A_IRQ_MODERATION_USEC,			/* u32 */
+	ETHTOOL_A_IRQ_MODERATION_PKTS,			/* u32 */
+	ETHTOOL_A_IRQ_MODERATION_COMPS,			/* u32 */
+
+	__ETHTOOL_A_IRQ_MODERATION_CNT,
+	ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1)
+};
+
 /* PAUSE */
 
 enum {
-- 
cgit v1.2.3


From 13ba28c5cd047e272f9dbbeb5c62c403873d8987 Mon Sep 17 00:00:00 2001
From: Heng Qi <hengqi@linux.alibaba.com>
Date: Fri, 21 Jun 2024 18:13:52 +0800
Subject: dim: add new interfaces for initialization and getting results

DIM-related mode and work have been collected in one same place,
so new interfaces are added to provide convenience.

Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240621101353.107425-5-hengqi@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dim.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'include')

diff --git a/include/linux/dim.h b/include/linux/dim.h
index e0f39bd85432..1b581ff25a15 100644
--- a/include/linux/dim.h
+++ b/include/linux/dim.h
@@ -256,6 +256,54 @@ int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags,
  */
 void net_dim_free_irq_moder(struct net_device *dev);
 
+/**
+ * net_dim_setting - initialize DIM's cq mode and schedule worker
+ * @dev: target network device
+ * @dim: DIM context
+ * @is_tx: true indicates the tx direction, false indicates the rx direction
+ */
+void net_dim_setting(struct net_device *dev, struct dim *dim, bool is_tx);
+
+/**
+ * net_dim_work_cancel - synchronously cancel dim's worker
+ * @dim: DIM context
+ */
+void net_dim_work_cancel(struct dim *dim);
+
+/**
+ * net_dim_get_rx_irq_moder - get DIM rx results based on profile_ix
+ * @dev: target network device
+ * @dim: DIM context
+ *
+ * Return: DIM irq moderation
+ */
+struct dim_cq_moder
+net_dim_get_rx_irq_moder(struct net_device *dev, struct dim *dim);
+
+/**
+ * net_dim_get_tx_irq_moder - get DIM tx results based on profile_ix
+ * @dev: target network device
+ * @dim: DIM context
+ *
+ * Return: DIM irq moderation
+ */
+struct dim_cq_moder
+net_dim_get_tx_irq_moder(struct net_device *dev, struct dim *dim);
+
+/**
+ * net_dim_set_rx_mode - set DIM rx cq mode
+ * @dev: target network device
+ * @rx_mode: target rx cq mode
+ */
+void net_dim_set_rx_mode(struct net_device *dev, u8 rx_mode);
+
+/**
+ * net_dim_set_tx_mode - set DIM tx cq mode
+ * @dev: target network device
+ * @tx_mode: target tx cq mode
+ */
+void net_dim_set_tx_mode(struct net_device *dev, u8 tx_mode);
+
 /**
  *	dim_on_top - check if current state is a good place to stop (top location)
  *	@dim: DIM context
-- 
cgit v1.2.3


From eee5528890d54b22b46f833002355a5ee94c3bb4 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@gmail.com>
Date: Mon, 24 Jun 2024 08:55:01 +0900
Subject: PCI: Add Edimax Vendor ID to pci_ids.h

Add the Edimax Vendor ID (0x1432) for an ethernet driver for Tehuti
Networks TN40xx chips. This ID can be used for Realtek 8180 and Ralink
rt28xx wireless drivers.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@gmail.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20240623235507.108147-2-fujita.tomonori@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 942a587bb97e..677aea20d3e1 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2126,6 +2126,8 @@
 
 #define PCI_VENDOR_ID_CHELSIO		0x1425
 
+#define PCI_VENDOR_ID_EDIMAX		0x1432
+
 #define PCI_VENDOR_ID_ADLINK		0x144a
 
 #define PCI_VENDOR_ID_SAMSUNG		0x144d
-- 
cgit v1.2.3


From e3943f00afdb71684c4f209f9d3a90d6b79771fc Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 19 Jun 2024 16:08:46 +0200
Subject: OPP: Introduce an OF helper function to inform if required-opps is
 used

As being shown from a subsequent change to genpd, it's useful to understand
if a device's OF node has an OPP-table described and whether it contains
OPP nodes that makes use of the required-opps DT property.

For this reason, let's introduce an OPP OF helper function called
dev_pm_opp_of_has_required_opp().

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/pm_opp.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index dd7c8441af42..6424692c30b7 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -474,6 +474,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpuma
 struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev);
 struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp);
 int of_get_required_opp_performance_state(struct device_node *np, int index);
+bool dev_pm_opp_of_has_required_opp(struct device *dev);
 int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_table *opp_table);
 int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus);
 int dev_pm_opp_calc_power(struct device *dev, unsigned long *uW,
@@ -552,6 +553,11 @@ static inline int of_get_required_opp_performance_state(struct device_node *np,
 	return -EOPNOTSUPP;
 }
 
+static inline bool dev_pm_opp_of_has_required_opp(struct device *dev)
+{
+	return false;
+}
+
 static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_table *opp_table)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3


From a047b66c0f05c668b4a6d41a3cacfe707efc9ecb Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Thu, 20 Jun 2024 01:53:43 +0300
Subject: media: v4l: subdev: Fix typo in documentation

Replace the incorrect reference to the v4l2_subdev_enable_stream()
function with the correct v4l2_subdev_enable_streams() spelling.

Fixes: d0749adb3070 ("media: v4l2-subdev: Add subdev .(enable|disable)_streams() operations")
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Link: https://lore.kernel.org/r/20240619225343.15873-1-laurent.pinchart@ideasonboard.com
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
---
 include/media/v4l2-subdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 1b74e037e440..bd235d325ff9 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -690,7 +690,7 @@ struct v4l2_subdev_pad_config {
  *
  * @pad: pad number
  * @stream: stream number
- * @enabled: has the stream been enabled with v4l2_subdev_enable_stream()
+ * @enabled: has the stream been enabled with v4l2_subdev_enable_streams()
  * @fmt: &struct v4l2_mbus_framefmt
  * @crop: &struct v4l2_rect to be used for crop
  * @compose: &struct v4l2_rect to be used for compose
-- 
cgit v1.2.3


From 1decf05d0f4de78ef67dc3f794709258c689e09e Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Tue, 18 Jun 2024 19:25:56 +0300
Subject: wifi: mac80211: inform the low level if drv_stop() is a suspend

This will allow the low level driver to take different actions for
different flows.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20240618192529.739036208b6e.Ie18a2fe8e02bf2717549d39420b350cfdaf3d317@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ecfa65ade226..9c96e8ae9ef7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4444,7 +4444,7 @@ struct ieee80211_ops {
 		   struct ieee80211_tx_control *control,
 		   struct sk_buff *skb);
 	int (*start)(struct ieee80211_hw *hw);
-	void (*stop)(struct ieee80211_hw *hw);
+	void (*stop)(struct ieee80211_hw *hw, bool suspend);
 #ifdef CONFIG_PM
 	int (*suspend)(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan);
 	int (*resume)(struct ieee80211_hw *hw);
-- 
cgit v1.2.3


From f531d13bdfe3f4f084aaa8acae2cb0f02295f5ae Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Mon, 27 May 2024 20:29:14 -0700
Subject: xfrm: support sending NAT keepalives in ESP in UDP states

Add the ability to send out RFC-3948 NAT keepalives from the xfrm stack.

To use, Userspace sets an XFRM_NAT_KEEPALIVE_INTERVAL integer property when
creating XFRM outbound states which denotes the number of seconds between
keepalive messages.

Keepalive messages are sent from a per net delayed work which iterates over
the xfrm states. The logic is guarded by the xfrm state spinlock due to the
xfrm state walk iterator.

Possible future enhancements:

- Adding counters to keep track of sent keepalives.
- deduplicate NAT keepalives between states sharing the same nat keepalive
  parameters.
- provisioning hardware offloads for devices capable of implementing this.
- revise xfrm state list to use an rcu list in order to avoid running this
  under spinlock.

Suggested-by: Paul Wouters <paul.wouters@aiven.io>
Tested-by: Paul Wouters <paul.wouters@aiven.io>
Tested-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/ipv6_stubs.h  |  3 +++
 include/net/netns/xfrm.h  |  1 +
 include/net/xfrm.h        | 10 ++++++++++
 include/uapi/linux/xfrm.h |  1 +
 4 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 485c39a89866..11cefd50704d 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -9,6 +9,7 @@
 #include <net/flow.h>
 #include <net/neighbour.h>
 #include <net/sock.h>
+#include <net/ipv6.h>
 
 /* structs from net/ip6_fib.h */
 struct fib6_info;
@@ -72,6 +73,8 @@ struct ipv6_stub {
 			     int (*output)(struct net *, struct sock *, struct sk_buff *));
 	struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr,
 					    struct net_device *dev);
+	int (*ip6_xmit)(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
+			__u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority);
 };
 extern const struct ipv6_stub *ipv6_stub __read_mostly;
 
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 423b52eca908..d489d9250bff 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -83,6 +83,7 @@ struct netns_xfrm {
 
 	spinlock_t xfrm_policy_lock;
 	struct mutex xfrm_cfg_mutex;
+	struct delayed_work	nat_keepalive_work;
 };
 
 #endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 77ebf5bcf0b9..46a214a76081 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -229,6 +229,10 @@ struct xfrm_state {
 	struct xfrm_encap_tmpl	*encap;
 	struct sock __rcu	*encap_sk;
 
+	/* NAT keepalive */
+	u32			nat_keepalive_interval; /* seconds */
+	time64_t		nat_keepalive_expiration;
+
 	/* Data for care-of address */
 	xfrm_address_t	*coaddr;
 
@@ -2203,4 +2207,10 @@ static inline int register_xfrm_state_bpf(void)
 }
 #endif
 
+int xfrm_nat_keepalive_init(unsigned short family);
+void xfrm_nat_keepalive_fini(unsigned short family);
+int xfrm_nat_keepalive_net_init(struct net *net);
+int xfrm_nat_keepalive_net_fini(struct net *net);
+void xfrm_nat_keepalive_state_updated(struct xfrm_state *x);
+
 #endif	/* _NET_XFRM_H */
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index d950d02ab791..f28701500714 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -321,6 +321,7 @@ enum xfrm_attr_type_t {
 	XFRMA_IF_ID,		/* __u32 */
 	XFRMA_MTIMER_THRESH,	/* __u32 in seconds for input SA */
 	XFRMA_SA_DIR,		/* __u8 */
+	XFRMA_NAT_KEEPALIVE_INTERVAL,	/* __u32 in seconds for NAT keepalive */
 	__XFRMA_MAX
 
 #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK	/* Compatibility */
-- 
cgit v1.2.3


From 0c5275bf75ec3708d95654195ae4ed80d946d088 Mon Sep 17 00:00:00 2001
From: Or Har-Toov <ohartoov@nvidia.com>
Date: Sun, 16 Jun 2024 19:10:36 +0300
Subject: RDMA/mlx5: Use sq timestamp as QP timestamp when RoCE is disabled

When creating a QP, one of the attributes is TS format (timestamp).
In some devices, we have a limitation that all QPs should have the same
ts_format. The ts_format is chosen based on the device's capability.
The qp_ts_format cap resides under the RoCE caps table, and the
cap will be 0 when RoCE is disabled. So when RoCE is disabled, the
value that should be queried is sq_ts_format under HCA caps.

Consider the case when the system supports REAL_TIME_TS format (0x2),
some QPs are created with REAL_TIME_TS as ts_format, and afterwards
RoCE gets disabled. When trying to construct a new QP, we can't use
the qp_ts_format, that is queried from the RoCE caps table, Since it
leads to passing 0x0 (FREE_RUNNING_TS) as the value of the qp_ts_format,
which is different than the ts_format of the previously allocated
QPs REAL_TIME_TS format (0x2).

Thus, to resolve this, read the sq_ts_format, which also reflect
the supported ts format for the QP when RoCE is disabled.

Fixes: 4806f1e2fee8 ("net/mlx5: Set QP timestamp mode to default")
Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
Link: https://lore.kernel.org/r/32801966eb767c7fd62b8dea3b63991d5fbfe213.1718554199.git.leon@kernel.org
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/mlx5/qp.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index f0e55bf3ec8b..ad1ce650146c 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -576,9 +576,12 @@ static inline const char *mlx5_qp_state_str(int state)
 
 static inline int mlx5_get_qp_default_ts(struct mlx5_core_dev *dev)
 {
-	return !MLX5_CAP_ROCE(dev, qp_ts_format) ?
-		       MLX5_TIMESTAMP_FORMAT_FREE_RUNNING :
-		       MLX5_TIMESTAMP_FORMAT_DEFAULT;
+	u8 supported_ts_cap = mlx5_get_roce_state(dev) ?
+			      MLX5_CAP_ROCE(dev, qp_ts_format) :
+			      MLX5_CAP_GEN(dev, sq_ts_format);
+
+	return supported_ts_cap ? MLX5_TIMESTAMP_FORMAT_DEFAULT :
+	       MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
 }
 
 #endif /* MLX5_QP_H */
-- 
cgit v1.2.3


From 844bc12e6da3e2d8ab0cd96d049fc695d5d8ba68 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <mgurtovoy@nvidia.com>
Date: Wed, 19 Jun 2024 20:11:52 +0300
Subject: IB/core: add support for draining Shared receive queues

To avoid leakage for QPs assocoated with SRQ, according to IB spec
(section 10.3.1):

"Note, for QPs that are associated with an SRQ, the Consumer should take
the QP through the Error State before invoking a Destroy QP or a Modify
QP to the Reset State. The Consumer may invoke the Destroy QP without
first performing a Modify QP to the Error State and waiting for the Affiliated
Asynchronous Last WQE Reached Event. However, if the Consumer
does not wait for the Affiliated Asynchronous Last WQE Reached Event,
then WQE and Data Segment leakage may occur. Therefore, it is good
programming practice to teardown a QP that is associated with an SRQ
by using the following process:
 - Put the QP in the Error State;
 - wait for the Affiliated Asynchronous Last WQE Reached Event;
 - either:
   - drain the CQ by invoking the Poll CQ verb and either wait for CQ
     to be empty or the number of Poll CQ operations has exceeded
     CQ capacity size; or
   - post another WR that completes on the same CQ and wait for this
     WR to return as a WC;
 - and then invoke a Destroy QP or Reset QP."

Catch the Last WQE Reached Event in the core layer during drain QP flow.

Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Link: https://lore.kernel.org/r/20240619171153.34631-2-mgurtovoy@nvidia.com
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/rdma/ib_verbs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 477bf9dd5e71..5a193008f99c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1788,6 +1788,7 @@ struct ib_qp {
 	struct list_head	rdma_mrs;
 	struct list_head	sig_mrs;
 	struct ib_srq	       *srq;
+	struct completion	srq_completion;
 	struct ib_xrcd	       *xrcd; /* XRC TGT QPs only */
 	struct list_head	xrcd_list;
 
@@ -1797,6 +1798,7 @@ struct ib_qp {
 	struct ib_qp           *real_qp;
 	struct ib_uqp_object   *uobject;
 	void                  (*event_handler)(struct ib_event *, void *);
+	void                  (*registered_event_handler)(struct ib_event *, void *);
 	void		       *qp_context;
 	/* sgid_attrs associated with the AV's */
 	const struct ib_gid_attr *av_sgid_attr;
-- 
cgit v1.2.3


From 210b1f6576e8b367907e7ff51ef425062e1468e4 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Mon, 24 Jun 2024 08:56:17 -0700
Subject: nvme-pci: do not directly handle subsys reset fallout

Scheduling reset_work after a nvme subsystem reset is expected to fail
on pcie, but this also prevents potential handling the platform's pcie
services may provide that might successfully recovering the link without
re-enumeration. Such examples include AER, DPC, and power's EEH.

Provide a pci specific operation that safely initiates a subsystem
reset, and instead of scheduling reset work, read back the status
register to trigger a pcie read error.

Since this only affects pci, the other fabrics drivers subscribe to a
generic nvmf subsystem reset that is exactly the same as before. The
loop fabric doesn't use it because nvmet doesn't support setting that
property anyway.

And since we're using the magic NSSR value in two places now, provide a
symbolic define for it.

Reported-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 27faae34245d..57e27e48c913 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -25,6 +25,9 @@
 
 #define NVME_NSID_ALL		0xffffffff
 
+/* Special NSSR value, 'NVMe' */
+#define NVME_SUBSYS_RESET	0x4E564D65
+
 enum nvme_subsys_type {
 	/* Referral to another discovery type target subsystem */
 	NVME_NQN_DISC	= 1,
-- 
cgit v1.2.3


From a6143bdcaf7e37ecce05df2a3d3c1c5d587f87b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Wed, 19 Jun 2024 12:11:42 +0200
Subject: mfd: stm32-timers: Unify alignment of register definition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use tabs consistently for indention and properly align register names,
values and comments. This improves readability (at least for my eyes).

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/da3b7f9af5794d7463aa62cbaa7251abf1af2018.1718791090.git.u.kleine-koenig@baylibre.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/stm32-timers.h | 170 +++++++++++++++++++--------------------
 1 file changed, 85 insertions(+), 85 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h
index 9eb17481b07f..5794110b2b28 100644
--- a/include/linux/mfd/stm32-timers.h
+++ b/include/linux/mfd/stm32-timers.h
@@ -12,97 +12,97 @@
 #include <linux/dma-mapping.h>
 #include <linux/regmap.h>
 
-#define TIM_CR1		0x00	/* Control Register 1      */
-#define TIM_CR2		0x04	/* Control Register 2      */
-#define TIM_SMCR	0x08	/* Slave mode control reg  */
-#define TIM_DIER	0x0C	/* DMA/interrupt register  */
-#define TIM_SR		0x10	/* Status register	   */
-#define TIM_EGR		0x14	/* Event Generation Reg    */
-#define TIM_CCMR1	0x18	/* Capt/Comp 1 Mode Reg    */
-#define TIM_CCMR2	0x1C	/* Capt/Comp 2 Mode Reg    */
-#define TIM_CCER	0x20	/* Capt/Comp Enable Reg    */
-#define TIM_CNT		0x24	/* Counter		   */
-#define TIM_PSC		0x28	/* Prescaler               */
-#define TIM_ARR		0x2c	/* Auto-Reload Register    */
-#define TIM_CCR1	0x34	/* Capt/Comp Register 1    */
-#define TIM_CCR2	0x38	/* Capt/Comp Register 2    */
-#define TIM_CCR3	0x3C	/* Capt/Comp Register 3    */
-#define TIM_CCR4	0x40	/* Capt/Comp Register 4    */
-#define TIM_BDTR	0x44	/* Break and Dead-Time Reg */
-#define TIM_DCR		0x48	/* DMA control register    */
-#define TIM_DMAR	0x4C	/* DMA register for transfer */
-#define TIM_TISEL	0x68	/* Input Selection         */
+#define TIM_CR1		0x00			/* Control Register 1			*/
+#define TIM_CR2		0x04			/* Control Register 2			*/
+#define TIM_SMCR	0x08			/* Slave mode control reg		*/
+#define TIM_DIER	0x0C			/* DMA/interrupt register		*/
+#define TIM_SR		0x10			/* Status register			*/
+#define TIM_EGR		0x14			/* Event Generation Reg			*/
+#define TIM_CCMR1	0x18			/* Capt/Comp 1 Mode Reg			*/
+#define TIM_CCMR2	0x1C			/* Capt/Comp 2 Mode Reg			*/
+#define TIM_CCER	0x20			/* Capt/Comp Enable Reg			*/
+#define TIM_CNT		0x24			/* Counter				*/
+#define TIM_PSC		0x28			/* Prescaler				*/
+#define TIM_ARR		0x2c			/* Auto-Reload Register			*/
+#define TIM_CCR1	0x34			/* Capt/Comp Register 1			*/
+#define TIM_CCR2	0x38			/* Capt/Comp Register 2			*/
+#define TIM_CCR3	0x3C			/* Capt/Comp Register 3			*/
+#define TIM_CCR4	0x40			/* Capt/Comp Register 4			*/
+#define TIM_BDTR	0x44			/* Break and Dead-Time Reg		*/
+#define TIM_DCR		0x48			/* DMA control register			*/
+#define TIM_DMAR	0x4C			/* DMA register for transfer		*/
+#define TIM_TISEL	0x68			/* Input Selection			*/
 
-#define TIM_CR1_CEN	BIT(0)	/* Counter Enable	   */
-#define TIM_CR1_DIR	BIT(4)  /* Counter Direction	   */
-#define TIM_CR1_ARPE	BIT(7)	/* Auto-reload Preload Ena */
-#define TIM_CR2_MMS	(BIT(4) | BIT(5) | BIT(6)) /* Master mode selection */
-#define TIM_CR2_MMS2	GENMASK(23, 20) /* Master mode selection 2 */
-#define TIM_SMCR_SMS	(BIT(0) | BIT(1) | BIT(2)) /* Slave mode selection */
-#define TIM_SMCR_TS	(BIT(4) | BIT(5) | BIT(6)) /* Trigger selection */
-#define TIM_DIER_UIE	BIT(0)	/* Update interrupt	   */
-#define TIM_DIER_CC1IE	BIT(1)  /* CC1 Interrupt Enable    */
-#define TIM_DIER_CC2IE	BIT(2)  /* CC2 Interrupt Enable    */
-#define TIM_DIER_CC3IE	BIT(3)  /* CC3 Interrupt Enable    */
-#define TIM_DIER_CC4IE	BIT(4)  /* CC4 Interrupt Enable    */
-#define TIM_DIER_CC_IE(x)	BIT((x) + 1) /* CC1, CC2, CC3, CC4 interrupt enable */
-#define TIM_DIER_UDE	BIT(8)  /* Update DMA request Enable */
-#define TIM_DIER_CC1DE	BIT(9)  /* CC1 DMA request Enable  */
-#define TIM_DIER_CC2DE	BIT(10) /* CC2 DMA request Enable  */
-#define TIM_DIER_CC3DE	BIT(11) /* CC3 DMA request Enable  */
-#define TIM_DIER_CC4DE	BIT(12) /* CC4 DMA request Enable  */
-#define TIM_DIER_COMDE	BIT(13) /* COM DMA request Enable  */
-#define TIM_DIER_TDE	BIT(14) /* Trigger DMA request Enable */
-#define TIM_SR_UIF	BIT(0)	/* Update interrupt flag   */
-#define TIM_SR_CC_IF(x)	BIT((x) + 1) /* CC1, CC2, CC3, CC4 interrupt flag */
-#define TIM_EGR_UG	BIT(0)	/* Update Generation       */
-#define TIM_CCMR_PE	BIT(3)	/* Channel Preload Enable  */
-#define TIM_CCMR_M1	(BIT(6) | BIT(5))  /* Channel PWM Mode 1 */
-#define TIM_CCMR_CC1S		(BIT(0) | BIT(1)) /* Capture/compare 1 sel */
-#define TIM_CCMR_IC1PSC		GENMASK(3, 2)	/* Input capture 1 prescaler */
-#define TIM_CCMR_CC2S		(BIT(8) | BIT(9)) /* Capture/compare 2 sel */
-#define TIM_CCMR_IC2PSC		GENMASK(11, 10)	/* Input capture 2 prescaler */
-#define TIM_CCMR_CC1S_TI1	BIT(0)	/* IC1/IC3 selects TI1/TI3 */
-#define TIM_CCMR_CC1S_TI2	BIT(1)	/* IC1/IC3 selects TI2/TI4 */
-#define TIM_CCMR_CC2S_TI2	BIT(8)	/* IC2/IC4 selects TI2/TI4 */
-#define TIM_CCMR_CC2S_TI1	BIT(9)	/* IC2/IC4 selects TI1/TI3 */
-#define TIM_CCMR_CC3S		(BIT(0) | BIT(1)) /* Capture/compare 3 sel */
-#define TIM_CCMR_CC4S		(BIT(8) | BIT(9)) /* Capture/compare 4 sel */
-#define TIM_CCMR_CC3S_TI3	BIT(0)	/* IC3 selects TI3 */
-#define TIM_CCMR_CC4S_TI4	BIT(8)	/* IC4 selects TI4 */
-#define TIM_CCER_CC1E	BIT(0)	/* Capt/Comp 1  out Ena    */
-#define TIM_CCER_CC1P	BIT(1)	/* Capt/Comp 1  Polarity   */
-#define TIM_CCER_CC1NE	BIT(2)	/* Capt/Comp 1N out Ena    */
-#define TIM_CCER_CC1NP	BIT(3)	/* Capt/Comp 1N Polarity   */
-#define TIM_CCER_CC2E	BIT(4)	/* Capt/Comp 2  out Ena    */
-#define TIM_CCER_CC2P	BIT(5)	/* Capt/Comp 2  Polarity   */
-#define TIM_CCER_CC2NP	BIT(7)	/* Capt/Comp 2N Polarity   */
-#define TIM_CCER_CC3E	BIT(8)	/* Capt/Comp 3  out Ena    */
-#define TIM_CCER_CC3P	BIT(9)	/* Capt/Comp 3  Polarity   */
-#define TIM_CCER_CC3NP	BIT(11)	/* Capt/Comp 3N Polarity   */
-#define TIM_CCER_CC4E	BIT(12)	/* Capt/Comp 4  out Ena    */
-#define TIM_CCER_CC4P	BIT(13)	/* Capt/Comp 4  Polarity   */
-#define TIM_CCER_CC4NP	BIT(15)	/* Capt/Comp 4N Polarity   */
-#define TIM_CCER_CCXE	(BIT(0) | BIT(4) | BIT(8) | BIT(12))
-#define TIM_BDTR_BKE(x)	BIT(12 + (x) * 12) /* Break input enable */
-#define TIM_BDTR_BKP(x)	BIT(13 + (x) * 12) /* Break input polarity */
-#define TIM_BDTR_AOE	BIT(14)	/* Automatic Output Enable */
-#define TIM_BDTR_MOE	BIT(15)	/* Main Output Enable      */
-#define TIM_BDTR_BKF(x)	(0xf << (16 + (x) * 4))
-#define TIM_DCR_DBA	GENMASK(4, 0)	/* DMA base addr */
-#define TIM_DCR_DBL	GENMASK(12, 8)	/* DMA burst len */
+#define TIM_CR1_CEN		BIT(0)					/* Counter Enable				*/
+#define TIM_CR1_DIR		BIT(4)					/* Counter Direction				*/
+#define TIM_CR1_ARPE		BIT(7)					/* Auto-reload Preload Ena			*/
+#define TIM_CR2_MMS		(BIT(4) | BIT(5) | BIT(6))		/* Master mode selection			*/
+#define TIM_CR2_MMS2		GENMASK(23, 20)				/* Master mode selection 2			*/
+#define TIM_SMCR_SMS		(BIT(0) | BIT(1) | BIT(2))		/* Slave mode selection				*/
+#define TIM_SMCR_TS		(BIT(4) | BIT(5) | BIT(6))		/* Trigger selection				*/
+#define TIM_DIER_UIE		BIT(0)					/* Update interrupt				*/
+#define TIM_DIER_CC1IE		BIT(1)					/* CC1 Interrupt Enable				*/
+#define TIM_DIER_CC2IE		BIT(2)					/* CC2 Interrupt Enable				*/
+#define TIM_DIER_CC3IE		BIT(3)					/* CC3 Interrupt Enable				*/
+#define TIM_DIER_CC4IE		BIT(4)					/* CC4 Interrupt Enable				*/
+#define TIM_DIER_CC_IE(x)	BIT((x) + 1)				/* CC1, CC2, CC3, CC4 interrupt enable		*/
+#define TIM_DIER_UDE		BIT(8)					/* Update DMA request Enable			*/
+#define TIM_DIER_CC1DE		BIT(9)					/* CC1 DMA request Enable			*/
+#define TIM_DIER_CC2DE		BIT(10)					/* CC2 DMA request Enable			*/
+#define TIM_DIER_CC3DE		BIT(11)					/* CC3 DMA request Enable			*/
+#define TIM_DIER_CC4DE		BIT(12)					/* CC4 DMA request Enable			*/
+#define TIM_DIER_COMDE		BIT(13)					/* COM DMA request Enable			*/
+#define TIM_DIER_TDE		BIT(14)					/* Trigger DMA request Enable			*/
+#define TIM_SR_UIF		BIT(0)					/* Update interrupt flag			*/
+#define TIM_SR_CC_IF(x)		BIT((x) + 1)				/* CC1, CC2, CC3, CC4 interrupt flag		*/
+#define TIM_EGR_UG		BIT(0)					/* Update Generation				*/
+#define TIM_CCMR_PE		BIT(3)					/* Channel Preload Enable			*/
+#define TIM_CCMR_M1		(BIT(6) | BIT(5))			/* Channel PWM Mode 1				*/
+#define TIM_CCMR_CC1S		(BIT(0) | BIT(1))			/* Capture/compare 1 sel			*/
+#define TIM_CCMR_IC1PSC		GENMASK(3, 2)				/* Input capture 1 prescaler			*/
+#define TIM_CCMR_CC2S		(BIT(8) | BIT(9))			/* Capture/compare 2 sel			*/
+#define TIM_CCMR_IC2PSC		GENMASK(11, 10)				/* Input capture 2 prescaler			*/
+#define TIM_CCMR_CC1S_TI1	BIT(0)					/* IC1/IC3 selects TI1/TI3			*/
+#define TIM_CCMR_CC1S_TI2	BIT(1)					/* IC1/IC3 selects TI2/TI4			*/
+#define TIM_CCMR_CC2S_TI2	BIT(8)					/* IC2/IC4 selects TI2/TI4			*/
+#define TIM_CCMR_CC2S_TI1	BIT(9)					/* IC2/IC4 selects TI1/TI3			*/
+#define TIM_CCMR_CC3S		(BIT(0) | BIT(1))			/* Capture/compare 3 sel			*/
+#define TIM_CCMR_CC4S		(BIT(8) | BIT(9))			/* Capture/compare 4 sel			*/
+#define TIM_CCMR_CC3S_TI3	BIT(0)					/* IC3 selects TI3				*/
+#define TIM_CCMR_CC4S_TI4	BIT(8)					/* IC4 selects TI4				*/
+#define TIM_CCER_CC1E		BIT(0)					/* Capt/Comp 1  out Ena				*/
+#define TIM_CCER_CC1P		BIT(1)					/* Capt/Comp 1  Polarity			*/
+#define TIM_CCER_CC1NE		BIT(2)					/* Capt/Comp 1N out Ena				*/
+#define TIM_CCER_CC1NP		BIT(3)					/* Capt/Comp 1N Polarity			*/
+#define TIM_CCER_CC2E		BIT(4)					/* Capt/Comp 2  out Ena				*/
+#define TIM_CCER_CC2P		BIT(5)					/* Capt/Comp 2  Polarity			*/
+#define TIM_CCER_CC2NP		BIT(7)					/* Capt/Comp 2N Polarity			*/
+#define TIM_CCER_CC3E		BIT(8)					/* Capt/Comp 3  out Ena				*/
+#define TIM_CCER_CC3P		BIT(9)					/* Capt/Comp 3  Polarity			*/
+#define TIM_CCER_CC3NP		BIT(11)					/* Capt/Comp 3N Polarity			*/
+#define TIM_CCER_CC4E		BIT(12)					/* Capt/Comp 4  out Ena				*/
+#define TIM_CCER_CC4P		BIT(13)					/* Capt/Comp 4  Polarity			*/
+#define TIM_CCER_CC4NP		BIT(15)					/* Capt/Comp 4N Polarity			*/
+#define TIM_CCER_CCXE		(BIT(0) | BIT(4) | BIT(8) | BIT(12))
+#define TIM_BDTR_BKE(x)		BIT(12 + (x) * 12)			/* Break input enable				*/
+#define TIM_BDTR_BKP(x)		BIT(13 + (x) * 12)			/* Break input polarity				*/
+#define TIM_BDTR_AOE		BIT(14)					/* Automatic Output Enable			*/
+#define TIM_BDTR_MOE		BIT(15)					/* Main Output Enable				*/
+#define TIM_BDTR_BKF(x)		(0xf << (16 + (x) * 4))
+#define TIM_DCR_DBA		GENMASK(4, 0)				/* DMA base addr				*/
+#define TIM_DCR_DBL		GENMASK(12, 8)				/* DMA burst len				*/
 
-#define MAX_TIM_PSC		0xFFFF
-#define MAX_TIM_ICPSC		0x3
-#define TIM_CR2_MMS_SHIFT	4
-#define TIM_CR2_MMS2_SHIFT	20
+#define MAX_TIM_PSC				0xFFFF
+#define MAX_TIM_ICPSC				0x3
+#define TIM_CR2_MMS_SHIFT			4
+#define TIM_CR2_MMS2_SHIFT			20
 #define TIM_SMCR_SMS_SLAVE_MODE_DISABLED	0 /* counts on internal clock when CEN=1 */
 #define TIM_SMCR_SMS_ENCODER_MODE_1		1 /* counts TI1FP1 edges, depending on TI2FP2 level */
 #define TIM_SMCR_SMS_ENCODER_MODE_2		2 /* counts TI2FP2 edges, depending on TI1FP1 level */
 #define TIM_SMCR_SMS_ENCODER_MODE_3		3 /* counts on both TI1FP1 and TI2FP2 edges */
-#define TIM_SMCR_TS_SHIFT	4
-#define TIM_BDTR_BKF_MASK	0xF
-#define TIM_BDTR_BKF_SHIFT(x)	(16 + (x) * 4)
+#define TIM_SMCR_TS_SHIFT			4
+#define TIM_BDTR_BKF_MASK			0xF
+#define TIM_BDTR_BKF_SHIFT(x)			(16 + (x) * 4)
 
 enum stm32_timers_dmas {
 	STM32_TIMERS_DMA_CH1,
-- 
cgit v1.2.3


From 796b942f65967af55684bf520812787b97522151 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Wed, 19 Jun 2024 12:11:43 +0200
Subject: mfd: stm32-timers: Add some register definitions with a parameter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are some registers that belong together and are numbered from 1 to
4. Introduce a macro definition for these that takes the channel number
as parameter and define the previously available constants using the new
ones.

This allows to simplify some users that up to now use constructs like

	TIM_CCER_CC1NE << (ch * 4)

which is an ugly mix of using a predefined value and still knowing
internal details about it.

Note that there are several decrements by 1 involved. These are
necessary because software guys start counting at 0 while the hardware
designer started at 1 (and having TIM_CCER_CCxE(1) be TIM_CCER_CC2E
isn't a sane option). The compiler is expected to optimize these out
nicely.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/05df15f61dde81033407d3b4fcb67ee403ecc8db.1718791090.git.u.kleine-koenig@baylibre.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/stm32-timers.h | 60 +++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h
index 5794110b2b28..92b45a559656 100644
--- a/include/linux/mfd/stm32-timers.h
+++ b/include/linux/mfd/stm32-timers.h
@@ -24,10 +24,11 @@
 #define TIM_CNT		0x24			/* Counter				*/
 #define TIM_PSC		0x28			/* Prescaler				*/
 #define TIM_ARR		0x2c			/* Auto-Reload Register			*/
-#define TIM_CCR1	0x34			/* Capt/Comp Register 1			*/
-#define TIM_CCR2	0x38			/* Capt/Comp Register 2			*/
-#define TIM_CCR3	0x3C			/* Capt/Comp Register 3			*/
-#define TIM_CCR4	0x40			/* Capt/Comp Register 4			*/
+#define TIM_CCRx(x)	(0x34 + 4 * ((x) - 1))	/* Capt/Comp Register x (x ∈ {1, .. 4})	*/
+#define TIM_CCR1	TIM_CCRx(1)		/* Capt/Comp Register 1			*/
+#define TIM_CCR2	TIM_CCRx(2)		/* Capt/Comp Register 2			*/
+#define TIM_CCR3	TIM_CCRx(3)		/* Capt/Comp Register 3			*/
+#define TIM_CCR4	TIM_CCRx(4)		/* Capt/Comp Register 4			*/
 #define TIM_BDTR	0x44			/* Break and Dead-Time Reg		*/
 #define TIM_DCR		0x48			/* DMA control register			*/
 #define TIM_DMAR	0x4C			/* DMA register for transfer		*/
@@ -41,16 +42,18 @@
 #define TIM_SMCR_SMS		(BIT(0) | BIT(1) | BIT(2))		/* Slave mode selection				*/
 #define TIM_SMCR_TS		(BIT(4) | BIT(5) | BIT(6))		/* Trigger selection				*/
 #define TIM_DIER_UIE		BIT(0)					/* Update interrupt				*/
-#define TIM_DIER_CC1IE		BIT(1)					/* CC1 Interrupt Enable				*/
-#define TIM_DIER_CC2IE		BIT(2)					/* CC2 Interrupt Enable				*/
-#define TIM_DIER_CC3IE		BIT(3)					/* CC3 Interrupt Enable				*/
-#define TIM_DIER_CC4IE		BIT(4)					/* CC4 Interrupt Enable				*/
+#define TIM_DIER_CCxIE(x)	BIT(1 + ((x) - 1))			/* CCx Interrupt Enable	(x ∈ {1, .. 4})		*/
+#define TIM_DIER_CC1IE		TIM_DIER_CCxIE(1)			/* CC1 Interrupt Enable				*/
+#define TIM_DIER_CC2IE		TIM_DIER_CCxIE(2)			/* CC2 Interrupt Enable				*/
+#define TIM_DIER_CC3IE		TIM_DIER_CCxIE(3)			/* CC3 Interrupt Enable				*/
+#define TIM_DIER_CC4IE		TIM_DIER_CCxIE(4)			/* CC4 Interrupt Enable				*/
 #define TIM_DIER_CC_IE(x)	BIT((x) + 1)				/* CC1, CC2, CC3, CC4 interrupt enable		*/
 #define TIM_DIER_UDE		BIT(8)					/* Update DMA request Enable			*/
-#define TIM_DIER_CC1DE		BIT(9)					/* CC1 DMA request Enable			*/
-#define TIM_DIER_CC2DE		BIT(10)					/* CC2 DMA request Enable			*/
-#define TIM_DIER_CC3DE		BIT(11)					/* CC3 DMA request Enable			*/
-#define TIM_DIER_CC4DE		BIT(12)					/* CC4 DMA request Enable			*/
+#define TIM_DIER_CCxDE(x)	BIT(9 + ((x) - 1))			/* CCx DMA request Enable (x ∈ {1, .. 4})	*/
+#define TIM_DIER_CC1DE		TIM_DIER_CCxDE(1)			/* CC1 DMA request Enable			*/
+#define TIM_DIER_CC2DE		TIM_DIER_CCxDE(2)			/* CC2 DMA request Enable			*/
+#define TIM_DIER_CC3DE		TIM_DIER_CCxDE(3)			/* CC3 DMA request Enable			*/
+#define TIM_DIER_CC4DE		TIM_DIER_CCxDE(4)			/* CC4 DMA request Enable			*/
 #define TIM_DIER_COMDE		BIT(13)					/* COM DMA request Enable			*/
 #define TIM_DIER_TDE		BIT(14)					/* Trigger DMA request Enable			*/
 #define TIM_SR_UIF		BIT(0)					/* Update interrupt flag			*/
@@ -70,19 +73,26 @@
 #define TIM_CCMR_CC4S		(BIT(8) | BIT(9))			/* Capture/compare 4 sel			*/
 #define TIM_CCMR_CC3S_TI3	BIT(0)					/* IC3 selects TI3				*/
 #define TIM_CCMR_CC4S_TI4	BIT(8)					/* IC4 selects TI4				*/
-#define TIM_CCER_CC1E		BIT(0)					/* Capt/Comp 1  out Ena				*/
-#define TIM_CCER_CC1P		BIT(1)					/* Capt/Comp 1  Polarity			*/
-#define TIM_CCER_CC1NE		BIT(2)					/* Capt/Comp 1N out Ena				*/
-#define TIM_CCER_CC1NP		BIT(3)					/* Capt/Comp 1N Polarity			*/
-#define TIM_CCER_CC2E		BIT(4)					/* Capt/Comp 2  out Ena				*/
-#define TIM_CCER_CC2P		BIT(5)					/* Capt/Comp 2  Polarity			*/
-#define TIM_CCER_CC2NP		BIT(7)					/* Capt/Comp 2N Polarity			*/
-#define TIM_CCER_CC3E		BIT(8)					/* Capt/Comp 3  out Ena				*/
-#define TIM_CCER_CC3P		BIT(9)					/* Capt/Comp 3  Polarity			*/
-#define TIM_CCER_CC3NP		BIT(11)					/* Capt/Comp 3N Polarity			*/
-#define TIM_CCER_CC4E		BIT(12)					/* Capt/Comp 4  out Ena				*/
-#define TIM_CCER_CC4P		BIT(13)					/* Capt/Comp 4  Polarity			*/
-#define TIM_CCER_CC4NP		BIT(15)					/* Capt/Comp 4N Polarity			*/
+#define TIM_CCER_CCxE(x)	BIT(0 + 4 * ((x) - 1))			/* Capt/Comp x  out Ena (x ∈ {1, .. 4})		*/
+#define TIM_CCER_CCxP(x)	BIT(1 + 4 * ((x) - 1))			/* Capt/Comp x  Polarity (x ∈ {1, .. 4})	*/
+#define TIM_CCER_CCxNE(x)	BIT(2 + 4 * ((x) - 1))			/* Capt/Comp xN out Ena (x ∈ {1, .. 4})		*/
+#define TIM_CCER_CCxNP(x)	BIT(3 + 4 * ((x) - 1))			/* Capt/Comp xN Polarity (x ∈ {1, .. 4})	*/
+#define TIM_CCER_CC1E		TIM_CCER_CCxE(1)			/* Capt/Comp 1  out Ena				*/
+#define TIM_CCER_CC1P		TIM_CCER_CCxP(1)			/* Capt/Comp 1  Polarity			*/
+#define TIM_CCER_CC1NE		TIM_CCER_CCxNE(1)			/* Capt/Comp 1N out Ena				*/
+#define TIM_CCER_CC1NP		TIM_CCER_CCxNP(1)			/* Capt/Comp 1N Polarity			*/
+#define TIM_CCER_CC2E		TIM_CCER_CCxE(2)			/* Capt/Comp 2  out Ena				*/
+#define TIM_CCER_CC2P		TIM_CCER_CCxP(2)			/* Capt/Comp 2  Polarity			*/
+#define TIM_CCER_CC2NE		TIM_CCER_CCxNE(2)			/* Capt/Comp 2N out Ena				*/
+#define TIM_CCER_CC2NP		TIM_CCER_CCxNP(2)			/* Capt/Comp 2N Polarity			*/
+#define TIM_CCER_CC3E		TIM_CCER_CCxE(3)			/* Capt/Comp 3  out Ena				*/
+#define TIM_CCER_CC3P		TIM_CCER_CCxP(3)			/* Capt/Comp 3  Polarity			*/
+#define TIM_CCER_CC3NE		TIM_CCER_CCxNE(3)			/* Capt/Comp 3N out Ena				*/
+#define TIM_CCER_CC3NP		TIM_CCER_CCxNP(3)			/* Capt/Comp 3N Polarity			*/
+#define TIM_CCER_CC4E		TIM_CCER_CCxE(4)			/* Capt/Comp 4  out Ena				*/
+#define TIM_CCER_CC4P		TIM_CCER_CCxP(4)			/* Capt/Comp 4  Polarity			*/
+#define TIM_CCER_CC4NE		TIM_CCER_CCxNE(4)			/* Capt/Comp 4N out Ena				*/
+#define TIM_CCER_CC4NP		TIM_CCER_CCxNP(4)			/* Capt/Comp 4N Polarity			*/
 #define TIM_CCER_CCXE		(BIT(0) | BIT(4) | BIT(8) | BIT(12))
 #define TIM_BDTR_BKE(x)		BIT(12 + (x) * 12)			/* Break input enable				*/
 #define TIM_BDTR_BKP(x)		BIT(13 + (x) * 12)			/* Break input polarity				*/
-- 
cgit v1.2.3


From 304d02aa711369da89b4f8c01702bf1b5d1f7abc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@baylibre.com>
Date: Wed, 19 Jun 2024 12:11:45 +0200
Subject: mfd: stm32-timers: Drop unused TIM_DIER_CC_IE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This macro is misleading as

	TIM_DIER_CC_IE(1) == TIM_DIER_CC2IE

. The only user was updated to use TIM_DIER_CCxIE() instead which
doesn't suffer from this mismatch, so TIM_DIER_CC_IE can be dropped.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/6c8fcc4ed159992a1dbb0796087e6ceb10c39c96.1718791090.git.u.kleine-koenig@baylibre.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/stm32-timers.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h
index 92b45a559656..f09ba598c97a 100644
--- a/include/linux/mfd/stm32-timers.h
+++ b/include/linux/mfd/stm32-timers.h
@@ -47,7 +47,6 @@
 #define TIM_DIER_CC2IE		TIM_DIER_CCxIE(2)			/* CC2 Interrupt Enable				*/
 #define TIM_DIER_CC3IE		TIM_DIER_CCxIE(3)			/* CC3 Interrupt Enable				*/
 #define TIM_DIER_CC4IE		TIM_DIER_CCxIE(4)			/* CC4 Interrupt Enable				*/
-#define TIM_DIER_CC_IE(x)	BIT((x) + 1)				/* CC1, CC2, CC3, CC4 interrupt enable		*/
 #define TIM_DIER_UDE		BIT(8)					/* Update DMA request Enable			*/
 #define TIM_DIER_CCxDE(x)	BIT(9 + ((x) - 1))			/* CCx DMA request Enable (x ∈ {1, .. 4})	*/
 #define TIM_DIER_CC1DE		TIM_DIER_CCxDE(1)			/* CC1 DMA request Enable			*/
-- 
cgit v1.2.3


From cf546dd289e0f6d2594c25e2fb4e19ee67c6d988 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 27 May 2024 17:40:10 +0200
Subject: block: change rq_integrity_vec to respect the iterator

If we allocate a bio that is larger than NVMe maximum request size,
attach integrity metadata to it and send it to the NVMe subsystem, the
integrity metadata will be corrupted.

Splitting the bio works correctly. The function bio_split will clone the
bio, trim the iterator of the first bio and advance the iterator of the
second bio.

However, the function rq_integrity_vec has a bug - it returns the first
vector of the bio's metadata and completely disregards the metadata
iterator that was advanced when the bio was split. Thus, the second bio
uses the same metadata as the first bio and this leads to metadata
corruption.

This commit changes rq_integrity_vec, so that it calls mp_bvec_iter_bvec
instead of returning the first vector. mp_bvec_iter_bvec reads the
iterator and uses it to build a bvec for the current position in the
iterator.

The "queue_max_integrity_segments(rq->q) > 1" check was removed, because
the updated rq_integrity_vec function works correctly with multiple
segments.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/49d1afaa-f934-6ed2-a678-e0d428c63a65@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-integrity.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index d201140d77a3..0fdd62e6d4b0 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -90,14 +90,13 @@ static inline bool blk_integrity_rq(struct request *rq)
 }
 
 /*
- * Return the first bvec that contains integrity data.  Only drivers that are
- * limited to a single integrity segment should use this helper.
+ * Return the current bvec that contains the integrity data. bip_iter may be
+ * advanced to iterate over the integrity data.
  */
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+static inline struct bio_vec rq_integrity_vec(struct request *rq)
 {
-	if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
-		return NULL;
-	return rq->bio->bi_integrity->bip_vec;
+	return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec,
+				 rq->bio->bi_integrity->bip_iter);
 }
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 static inline int blk_rq_count_integrity_sg(struct request_queue *q,
@@ -148,7 +147,8 @@ static inline int blk_integrity_rq(struct request *rq)
 
 static inline struct bio_vec *rq_integrity_vec(struct request *rq)
 {
-	return NULL;
+	/* the optimizer will remove all calls to this function */
+	return (struct bio_vec){ };
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
-- 
cgit v1.2.3


From 5d55721d6e24c8e99cc86ee1fcb90d776ef47964 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 23 Jun 2024 08:45:33 +0200
Subject: power: supply: samsung-sdi-battery: Constify struct
 power_supply_vbat_ri_table

'struct power_supply_vbat_ri_table' are not modified in this driver.

Constifying these structures moves some data to a read-only section, so
increase overall security.

In order to do it, some code also needs to be adjusted to this new const
qualifier.

On a x86_64, with allmodconfig:
Before:
======
$ size drivers/power/supply/samsung-sdi-battery.o
   text	   data	    bss	    dec	    hex	filename
    955	   7664	      0	   8619	   21ab	drivers/power/supply/samsung-sdi-battery.o

After:
=====
$ size drivers/power/supply/samsung-sdi-battery.o
   text	   data	    bss	    dec	    hex	filename
   4055	   4584	      0	   8639	   21bf	drivers/power/supply/samsung-sdi-battery.o

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/d01818abd880bf435d1106a9a6cc11a7a8a3e661.1719125040.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 65082ef75692..5061eeecf62e 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -755,9 +755,9 @@ struct power_supply_battery_info {
 	int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX];
 	struct power_supply_resistance_temp_table *resist_table;
 	int resist_table_size;
-	struct power_supply_vbat_ri_table *vbat2ri_discharging;
+	const struct power_supply_vbat_ri_table *vbat2ri_discharging;
 	int vbat2ri_discharging_size;
-	struct power_supply_vbat_ri_table *vbat2ri_charging;
+	const struct power_supply_vbat_ri_table *vbat2ri_charging;
 	int vbat2ri_charging_size;
 	int bti_resistance_ohm;
 	int bti_resistance_tolerance;
-- 
cgit v1.2.3


From 0b209ec85b2b73c38a09ba71dc05fbe4aee7be67 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 23 Jun 2024 10:04:45 +0200
Subject: power: supply: samsung-sdi-battery: Constify struct
 power_supply_maintenance_charge_table

'struct power_supply_maintenance_charge_table' is not modified in this
driver.

Constifying this structure moves some data to a read-only section, so
increase overall security.

In order to do it, some code also needs to be adjusted to this new const
qualifier.

On a x86_64, with allmodconfig:
Before:
======
$ size drivers/power/supply/samsung-sdi-battery.o
   text	   data	    bss	    dec	    hex	filename
   4055	   4584	      0	   8639	   21bf	drivers/power/supply/samsung-sdi-battery.o

After:
=====
$ size drivers/power/supply/samsung-sdi-battery.o
   text	   data	    bss	    dec	    hex	filename
   4087	   4552	      0	   8639	   21bf	drivers/power/supply/samsung-sdi-battery.o

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/6caafd0ac2556a40405273b1a4badc508ea8e9b0.1719125040.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/power_supply.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 5061eeecf62e..72dc7e45c90c 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -736,7 +736,7 @@ struct power_supply_battery_info {
 	int overvoltage_limit_uv;
 	int constant_charge_current_max_ua;
 	int constant_charge_voltage_max_uv;
-	struct power_supply_maintenance_charge_table *maintenance_charge;
+	const struct power_supply_maintenance_charge_table *maintenance_charge;
 	int maintenance_charge_size;
 	int alert_low_temp_charge_current_ua;
 	int alert_low_temp_charge_voltage_uv;
@@ -810,7 +810,7 @@ power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table
 				int table_len, int temp);
 extern int power_supply_vbat2ri(struct power_supply_battery_info *info,
 				int vbat_uv, bool charging);
-extern struct power_supply_maintenance_charge_table *
+extern const struct power_supply_maintenance_charge_table *
 power_supply_get_maintenance_charging_setting(struct power_supply_battery_info *info, int index);
 extern bool power_supply_battery_bti_in_range(struct power_supply_battery_info *info,
 					      int resistance);
@@ -824,7 +824,7 @@ extern int power_supply_set_battery_charged(struct power_supply *psy);
 static inline bool
 power_supply_supports_maintenance_charging(struct power_supply_battery_info *info)
 {
-	struct power_supply_maintenance_charge_table *mt;
+	const struct power_supply_maintenance_charge_table *mt;
 
 	mt = power_supply_get_maintenance_charging_setting(info, 0);
 
-- 
cgit v1.2.3


From aaa53168cbcc486ca1927faac00bd99e81d4ff04 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 28 May 2024 13:32:34 +0200
Subject: dm: optimize flushes

Device mapper sends flush bios to all the targets and the targets send it
to the underlying device. That may be inefficient, for example if a table
contains 10 linear targets pointing to the same physical device, then
device mapper would send 10 flush bios to that device - despite the fact
that only one bio would be sufficient.

This commit optimizes the flush behavior. It introduces a per-target
variable flush_bypasses_map - it is set when the target supports flush
optimization - currently, the dm-linear and dm-stripe targets support it.
When all the targets in a table have flush_bypasses_map,
flush_bypasses_map on the table is set. __send_empty_flush tests if the
table has flush_bypasses_map - and if it has, no flush bios are sent to
the targets via the "map" method and the list dm_table->devices is
iterated and the flush bios are sent to each member of the list.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Mike Snitzer <snitzer@kernel.org>
Suggested-by: Yang Yang <yang.yang@vivo.com>
---
 include/linux/device-mapper.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 82b2195efaca..3611b230d0aa 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -397,6 +397,21 @@ struct dm_target {
 	 * bio_set_dev(). NOTE: ideally a target should _not_ need this.
 	 */
 	bool needs_bio_set_dev:1;
+
+	/*
+	 * Set if the target supports flush optimization. If all the targets in
+	 * a table have flush_bypasses_map set, the dm core will not send
+	 * flushes to the targets via a ->map method. It will iterate over
+	 * dm_table->devices and send flushes to the devices directly. This
+	 * optimization reduces the number of flushes being sent when multiple
+	 * targets in a table use the same underlying device.
+	 *
+	 * This optimization may be enabled on targets that just pass the
+	 * flushes to the underlying devices without performing any other
+	 * actions on the flush request. Currently, dm-linear and dm-stripe
+	 * support it.
+	 */
+	bool flush_bypasses_map:1;
 };
 
 void *dm_per_bio_data(struct bio *bio, size_t data_size);
-- 
cgit v1.2.3


From ec9b1cf0b0ebfb52274971a8a0e74e0a133f64fb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 26 Jun 2024 16:26:24 +0200
Subject: block: rename BLK_FEAT_MISALIGNED

This is a flag for ->flags and not a feature for ->features.  And fix the
one place that actually incorrectly cleared it from ->features.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20240626142637.300624-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b2f1362c4681..1a7e9d9c16d7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -347,7 +347,7 @@ enum {
 	BLK_FLAG_WRITE_CACHE_DISABLED		= (1u << 0),
 
 	/* I/O topology is misaligned */
-	BLK_FEAT_MISALIGNED			= (1u << 1),
+	BLK_FLAG_MISALIGNED			= (1u << 1),
 };
 
 struct queue_limits {
-- 
cgit v1.2.3


From fcf865e357f80285af12c0c9a49f89d71acb7f4b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 26 Jun 2024 16:26:25 +0200
Subject: block: convert features and flags to __bitwise types

... and let sparse help us catch mismatches or abuses.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20240626142637.300624-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 85 +++++++++++++++++++++++++-------------------------
 1 file changed, 43 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1a7e9d9c16d7..b37826b350a2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -283,55 +283,56 @@ static inline bool blk_op_is_passthrough(blk_opf_t op)
 }
 
 /* flags set by the driver in queue_limits.features */
-enum {
-	/* supports a volatile write cache */
-	BLK_FEAT_WRITE_CACHE			= (1u << 0),
+typedef unsigned int __bitwise blk_features_t;
 
-	/* supports passing on the FUA bit */
-	BLK_FEAT_FUA				= (1u << 1),
+/* supports a volatile write cache */
+#define BLK_FEAT_WRITE_CACHE		((__force blk_features_t)(1u << 0))
 
-	/* rotational device (hard drive or floppy) */
-	BLK_FEAT_ROTATIONAL			= (1u << 2),
+/* supports passing on the FUA bit */
+#define BLK_FEAT_FUA			((__force blk_features_t)(1u << 1))
 
-	/* contributes to the random number pool */
-	BLK_FEAT_ADD_RANDOM			= (1u << 3),
+/* rotational device (hard drive or floppy) */
+#define BLK_FEAT_ROTATIONAL		((__force blk_features_t)(1u << 2))
 
-	/* do disk/partitions IO accounting */
-	BLK_FEAT_IO_STAT			= (1u << 4),
+/* contributes to the random number pool */
+#define BLK_FEAT_ADD_RANDOM		((__force blk_features_t)(1u << 3))
 
-	/* don't modify data until writeback is done */
-	BLK_FEAT_STABLE_WRITES			= (1u << 5),
+/* do disk/partitions IO accounting */
+#define BLK_FEAT_IO_STAT		((__force blk_features_t)(1u << 4))
 
-	/* always completes in submit context */
-	BLK_FEAT_SYNCHRONOUS			= (1u << 6),
+/* don't modify data until writeback is done */
+#define BLK_FEAT_STABLE_WRITES		((__force blk_features_t)(1u << 5))
 
-	/* supports REQ_NOWAIT */
-	BLK_FEAT_NOWAIT				= (1u << 7),
+/* always completes in submit context */
+#define BLK_FEAT_SYNCHRONOUS		((__force blk_features_t)(1u << 6))
 
-	/* supports DAX */
-	BLK_FEAT_DAX				= (1u << 8),
+/* supports REQ_NOWAIT */
+#define BLK_FEAT_NOWAIT			((__force blk_features_t)(1u << 7))
 
-	/* supports I/O polling */
-	BLK_FEAT_POLL				= (1u << 9),
+/* supports DAX */
+#define BLK_FEAT_DAX			((__force blk_features_t)(1u << 8))
 
-	/* is a zoned device */
-	BLK_FEAT_ZONED				= (1u << 10),
+/* supports I/O polling */
+#define BLK_FEAT_POLL			((__force blk_features_t)(1u << 9))
 
-	/* supports Zone Reset All */
-	BLK_FEAT_ZONE_RESETALL			= (1u << 11),
+/* is a zoned device */
+#define BLK_FEAT_ZONED			((__force blk_features_t)(1u << 10))
 
-	/* supports PCI(e) p2p requests */
-	BLK_FEAT_PCI_P2PDMA			= (1u << 12),
+/* supports Zone Reset All */
+#define BLK_FEAT_ZONE_RESETALL		((__force blk_features_t)(1u << 11))
 
-	/* skip this queue in blk_mq_(un)quiesce_tagset */
-	BLK_FEAT_SKIP_TAGSET_QUIESCE		= (1u << 13),
+/* supports PCI(e) p2p requests */
+#define BLK_FEAT_PCI_P2PDMA		((__force blk_features_t)(1u << 12))
 
-	/* bounce all highmem pages */
-	BLK_FEAT_BOUNCE_HIGH			= (1u << 14),
+/* skip this queue in blk_mq_(un)quiesce_tagset */
+#define BLK_FEAT_SKIP_TAGSET_QUIESCE	((__force blk_features_t)(1u << 13))
 
-	/* undocumented magic for bcache */
-	BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE	= (1u << 15),
-};
+/* bounce all highmem pages */
+#define BLK_FEAT_BOUNCE_HIGH		((__force blk_features_t)(1u << 14))
+
+/* undocumented magic for bcache */
+#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
+	((__force blk_features_t)(1u << 15))
 
 /*
  * Flags automatically inherited when stacking limits.
@@ -342,17 +343,17 @@ enum {
 	 BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE)
 
 /* internal flags in queue_limits.flags */
-enum {
-	/* do not send FLUSH/FUA commands despite advertising a write cache */
-	BLK_FLAG_WRITE_CACHE_DISABLED		= (1u << 0),
+typedef unsigned int __bitwise blk_flags_t;
 
-	/* I/O topology is misaligned */
-	BLK_FLAG_MISALIGNED			= (1u << 1),
-};
+/* do not send FLUSH/FUA commands despite advertising a write cache */
+#define BLK_FLAG_WRITE_CACHE_DISABLED	((__force blk_flags_t)(1u << 0))
+
+/* I/O topology is misaligned */
+#define BLK_FLAG_MISALIGNED		((__force blk_flags_t)(1u << 1))
 
 struct queue_limits {
-	unsigned int		features;
-	unsigned int		flags;
+	blk_features_t		features;
+	blk_flags_t		flags;
 	unsigned long		seg_boundary_mask;
 	unsigned long		virt_boundary_mask;
 
-- 
cgit v1.2.3


From 73781b3b81e76583708a652c853d54d03dce031d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 26 Jun 2024 16:26:27 +0200
Subject: block: remove disk_update_readahead

Mark blk_apply_bdi_limits non-static and open code disk_update_readahead
in the only caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20240626142637.300624-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b37826b350a2..6b88382012e9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -973,7 +973,6 @@ static inline void blk_queue_disable_write_zeroes(struct request_queue *q)
 /*
  * Access functions for manipulating queue properties
  */
-void disk_update_readahead(struct gendisk *disk);
 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
-- 
cgit v1.2.3


From abfc9d810926dfbf5645c7755c8d5ab96273f27d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 26 Jun 2024 16:26:28 +0200
Subject: block: remove the fallback case in queue_dma_alignment

Now that all updates go through blk_validate_limits the default of 511
is set at initialization time.  Also remove the unused NULL check as
calling this helper on a NULL queue can't happen (and doesn't make
much sense to start with).

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20240626142637.300624-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6b88382012e9..94fcbc912312 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1394,7 +1394,7 @@ static inline bool bdev_is_zone_start(struct block_device *bdev,
 
 static inline int queue_dma_alignment(const struct request_queue *q)
 {
-	return q ? q->limits.dma_alignment : 511;
+	return q->limits.dma_alignment;
 }
 
 static inline unsigned int
-- 
cgit v1.2.3


From e94b45d08b5d1c230c0f59c3eed758d28658851e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 26 Jun 2024 16:26:29 +0200
Subject: block: move dma_pad_mask into queue_limits

dma_pad_mask is a queue_limits by all ways of looking at it, so move it
there and set it through the atomic queue limits APIs.

Add a little helper that takes the alignment and pad into account to
simplify the code that is touched a bit.

Note that there never was any need for the > check in
blk_queue_update_dma_pad, this probably was just copy and paste from
dma_update_dma_alignment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20240626142637.300624-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 94fcbc912312..a53e3434e1a2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -401,6 +401,7 @@ struct queue_limits {
 	 * due to possible offsets.
 	 */
 	unsigned int		dma_alignment;
+	unsigned int		dma_pad_mask;
 
 	struct blk_integrity	integrity;
 };
@@ -509,8 +510,6 @@ struct request_queue {
 	 */
 	int			id;
 
-	unsigned int		dma_pad_mask;
-
 	/*
 	 * queue settings
 	 */
@@ -981,7 +980,6 @@ extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
 void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
 		sector_t offset, const char *pfx);
-extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 
 struct blk_independent_access_ranges *
@@ -1433,10 +1431,16 @@ static inline bool bdev_iter_is_aligned(struct block_device *bdev,
 				   bdev_logical_block_size(bdev) - 1);
 }
 
+static inline int blk_lim_dma_alignment_and_pad(struct queue_limits *lim)
+{
+	return lim->dma_alignment | lim->dma_pad_mask;
+}
+
 static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
 				 unsigned int len)
 {
-	unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
+	unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits);
+
 	return !(addr & alignment) && !(len & alignment);
 }
 
-- 
cgit v1.2.3


From 769cb63166d90f1fadafa4352f180cbd96b6cb77 Mon Sep 17 00:00:00 2001
From: Peter Griffin <peter.griffin@linaro.org>
Date: Fri, 21 Jun 2024 12:55:43 +0100
Subject: mfd: syscon: Add of_syscon_register_regmap() API

The of_syscon_register_regmap() API allows an externally created regmap
to be registered with syscon. This regmap can then be returned to client
drivers using the syscon_regmap_lookup_by_phandle() APIs.

The API is used by platforms where mmio access to the syscon registers is
not possible, and a underlying soc driver like exynos-pmu provides a SoC
specific regmap that can issue a SMC or hypervisor call to write the
register.

This approach keeps the SoC complexities out of syscon, but allows common
drivers such as  syscon-poweroff, syscon-reboot and friends that are used
by many SoCs already to be re-used.

Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Sam Protsenko <semen.protsenko@linaro.org>
Tested-by: Will McVicker <willmcvicker@google.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240621115544.1655458-2-peter.griffin@linaro.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/syscon.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h
index c315903f6dab..aad9c6b50463 100644
--- a/include/linux/mfd/syscon.h
+++ b/include/linux/mfd/syscon.h
@@ -28,6 +28,8 @@ struct regmap *syscon_regmap_lookup_by_phandle_args(struct device_node *np,
 						    unsigned int *out_args);
 struct regmap *syscon_regmap_lookup_by_phandle_optional(struct device_node *np,
 							const char *property);
+int of_syscon_register_regmap(struct device_node *np,
+			      struct regmap *regmap);
 #else
 static inline struct regmap *device_node_to_regmap(struct device_node *np)
 {
@@ -67,6 +69,12 @@ static inline struct regmap *syscon_regmap_lookup_by_phandle_optional(
 	return NULL;
 }
 
+static inline int of_syscon_register_regmap(struct device_node *np,
+					struct regmap *regmap)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif
 
 #endif /* __LINUX_MFD_SYSCON_H__ */
-- 
cgit v1.2.3


From 3ebc76c424bc0f0768f5c346667e8f51217917ba Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Wed, 26 Jun 2024 16:22:08 +0200
Subject: drm/mipi-dsi: add mipi_dsi_usleep_range helper

Like for mipi_dsi_msleep(), usleep_range() may often be called
in between mipi_dsi_dcs_*() functions and needs a multi compatible
counter part.

Suggested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://lore.kernel.org/r/20240626142212.1341556-3-jbrunet@baylibre.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240626142212.1341556-3-jbrunet@baylibre.com
---
 include/drm/drm_mipi_dsi.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index 71d121aeef24..0f520eeeaa8e 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -10,6 +10,7 @@
 #define __DRM_MIPI_DSI_H__
 
 #include <linux/device.h>
+#include <linux/delay.h>
 
 struct mipi_dsi_host;
 struct mipi_dsi_device;
@@ -297,6 +298,12 @@ ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params,
 			msleep(delay);	\
 	} while (0)
 
+#define mipi_dsi_usleep_range(ctx, min, max)	\
+	do {					\
+		if (!(ctx)->accum_err)		\
+			usleep_range(min, max);	\
+	} while (0)
+
 /**
  * enum mipi_dsi_dcs_tear_mode - Tearing Effect Output Line mode
  * @MIPI_DSI_DCS_TEAR_MODE_VBLANK: the TE output line consists of V-Blanking
-- 
cgit v1.2.3


From fafc20ded3f4659873c83c2af6d389983d480994 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 12 Jun 2024 06:02:26 +0000
Subject: ASoC: audio-graph-port: add link-trigger-order

Sound Card need to consider/adjust HW control ordering based on the
combination of CPU/Codec. The controlling feature is already supported
on ASoC, but Simple Audio Card / Audio Graph Card still not support it.
Let's support it.

Cc: Maxim Kochetkov <fido_max@inbox.ru>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://patch.msgid.link/87sexizojx.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/dt-bindings/sound/audio-graph.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 include/dt-bindings/sound/audio-graph.h

(limited to 'include')

diff --git a/include/dt-bindings/sound/audio-graph.h b/include/dt-bindings/sound/audio-graph.h
new file mode 100644
index 000000000000..bdb70c6b7332
--- /dev/null
+++ b/include/dt-bindings/sound/audio-graph.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * audio-graph.h
+ *
+ * Copyright (c) 2024 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ */
+#ifndef __AUDIO_GRAPH_H
+#define __AUDIO_GRAPH_H
+
+/*
+ * used in
+ *	link-trigger-order
+ *	link-trigger-order-start
+ *	link-trigger-order-stop
+ *
+ * default is
+ *	link-trigger-order = <SND_SOC_TRIGGER_LINK
+ *			      SND_SOC_TRIGGER_COMPONENT
+ *			      SND_SOC_TRIGGER_DAI>;
+ */
+#define SND_SOC_TRIGGER_LINK		0
+#define SND_SOC_TRIGGER_COMPONENT	1
+#define SND_SOC_TRIGGER_DAI		2
+#define SND_SOC_TRIGGER_SIZE		3	/* shoud be last */
+
+#endif /* __AUDIO_GRAPH_H */
-- 
cgit v1.2.3


From 5d9cacdccf17bd33dac3ea378324650159c2a863 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 12 Jun 2024 06:02:33 +0000
Subject: ASoC: simple-card-utils: add link-trigger-order support

Some Sound Card might need special trigger ordering which is based on
CPU/Codec connection. It is already supported on ASoC, but Simple Audio
Card / Audio Graph Card still not support it. Let's support it.

Cc: Maxim Kochetkov <fido_max@inbox.ru>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://patch.msgid.link/87r0d2zojq.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/simple_card_utils.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h
index 0a6435ac5c5f..3360d9eab068 100644
--- a/include/sound/simple_card_utils.h
+++ b/include/sound/simple_card_utils.h
@@ -199,6 +199,10 @@ int graph_util_parse_dai(struct device *dev, struct device_node *ep,
 
 void graph_util_parse_link_direction(struct device_node *np,
 				    bool *is_playback_only, bool *is_capture_only);
+void graph_util_parse_trigger_order(struct simple_util_priv *priv,
+				    struct device_node *np,
+				    enum snd_soc_trigger_order *trigger_start,
+				    enum snd_soc_trigger_order *trigger_stop);
 
 #ifdef DEBUG
 static inline void simple_util_debug_dai(struct simple_util_priv *priv,
-- 
cgit v1.2.3


From 1cb7d29157603561af4c38535e936850ceb99f0f Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Sun, 16 Jun 2024 11:53:55 +0100
Subject: regulator: core: Add helper for allow HW access to enable/disable
 regulator

Add a helper function that allow regulator consumers to allow low-level
HW access, in order to enable/disable regulator in atomic context.

The use-case for RZ/G2L SoC is to enable VBUS selection register based
on vbus detection that happens in interrupt context.

Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://patch.msgid.link/20240616105402.45211-4-biju.das.jz@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/consumer.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 59d0b9a79e6e..550a0e6523ae 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -250,6 +250,7 @@ int regulator_get_hardware_vsel_register(struct regulator *regulator,
 					 unsigned *vsel_mask);
 int regulator_list_hardware_vsel(struct regulator *regulator,
 				 unsigned selector);
+int regulator_hardware_enable(struct regulator *regulator, bool enable);
 
 /* regulator notifier block */
 int regulator_register_notifier(struct regulator *regulator,
@@ -571,6 +572,12 @@ static inline int regulator_list_hardware_vsel(struct regulator *regulator,
 	return -EOPNOTSUPP;
 }
 
+static inline int regulator_hardware_enable(struct regulator *regulator,
+					    bool enable)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int regulator_register_notifier(struct regulator *regulator,
 			      struct notifier_block *nb)
 {
-- 
cgit v1.2.3


From 7e5161da9d267957b726a29f3efe6cb50fdfed04 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Sun, 23 Jun 2024 13:31:19 -0700
Subject: drm/xe/oa: Fix kernel doc in xe_drm.h

Fix kernel doc in xe_drm.h. Also eliminate private/non-abi enum
definitions.

v2: Remove __DRM_XE_PERF_TYPE_MAX since it is unused (Michal)
v3: Also remove DRM_XE_OA_PROPERTY_MAX since it can also be
    eliminated (Michal)

Suggested-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240623203119.3840283-1-ashutosh.dixit@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 93e00be44b2d..b410553faa9b 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1379,8 +1379,8 @@ struct drm_xe_wait_user_fence {
  * enum drm_xe_perf_type - Perf stream types
  */
 enum drm_xe_perf_type {
+	/** @DRM_XE_PERF_TYPE_OA: OA perf stream type */
 	DRM_XE_PERF_TYPE_OA,
-	__DRM_XE_PERF_TYPE_MAX, /* non-ABI */
 };
 
 /**
@@ -1611,9 +1611,6 @@ enum drm_xe_oa_property_id {
 	 * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0.
 	 */
 	DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE,
-
-	/** @DRM_XE_OA_PROPERTY_MAX: non-ABI */
-	DRM_XE_OA_PROPERTY_MAX
 };
 
 /**
-- 
cgit v1.2.3


From 406d058dc323ae152d380ac90153eb56a75850c1 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Wed, 26 Jun 2024 11:18:17 -0700
Subject: drm/xe/oa/uapi: Allow preemption to be disabled on the stream exec
 queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mesa VK_KHR_performance_query use case requires preemption and timeslicing
to be disabled for the stream exec queue. Implement this functionality
here.

v2: Minor change to debug print to print both ret values (Umesh)

Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240626181817.1516229-3-ashutosh.dixit@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index b410553faa9b..12eaa8532b5c 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1611,6 +1611,12 @@ enum drm_xe_oa_property_id {
 	 * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0.
 	 */
 	DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE,
+
+	/**
+	 * @DRM_XE_OA_PROPERTY_NO_PREEMPT: Allow preemption and timeslicing
+	 * to be disabled for the stream exec queue.
+	 */
+	DRM_XE_OA_PROPERTY_NO_PREEMPT,
 };
 
 /**
-- 
cgit v1.2.3


From 7931d32955e09d0a11b1fe0b6aac1bfa061c005c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 26 Jun 2024 23:15:38 +0200
Subject: netfilter: nf_tables: fully validate NFT_DATA_VALUE on store to data
 registers

register store validation for NFT_DATA_VALUE is conditional, however,
the datatype is always either NFT_DATA_VALUE or NFT_DATA_VERDICT. This
only requires a new helper function to infer the register type from the
set datatype so this conditional check can be removed. Otherwise,
pointer to chain object can be leaked through the registers.

Fixes: 96518518cc41 ("netfilter: add nftables")
Reported-by: Linus Torvalds <torvalds@linuxfoundation.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2796153b03da..188d41da1a40 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -619,6 +619,11 @@ static inline void *nft_set_priv(const struct nft_set *set)
 	return (void *)set->data;
 }
 
+static inline enum nft_data_types nft_set_datatype(const struct nft_set *set)
+{
+	return set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE;
+}
+
 static inline bool nft_set_gc_is_pending(const struct nft_set *s)
 {
 	return refcount_read(&s->refs) != 1;
-- 
cgit v1.2.3


From 69b6517687a4b1fb250bd8c9c193a0a304c8ba17 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 26 Jun 2024 19:01:58 -0600
Subject: block: use the right type for stub rq_integrity_vec()

For !CONFIG_BLK_DEV_INTEGRITY, rq_integrity_vec() wasn't updated
properly. Fix it up.

Fixes: cf546dd289e0 ("block: change rq_integrity_vec to respect the iterator")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-integrity.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 0fdd62e6d4b0..c58634924782 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -145,7 +145,7 @@ static inline int blk_integrity_rq(struct request *rq)
 	return 0;
 }
 
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+static inline struct bio_vec rq_integrity_vec(struct request *rq)
 {
 	/* the optimizer will remove all calls to this function */
 	return (struct bio_vec){ };
-- 
cgit v1.2.3


From 78b506984ebeaefaf63172059422fc606dc23e68 Mon Sep 17 00:00:00 2001
From: Ranjan Kumar <ranjan.kumar@broadcom.com>
Date: Wed, 26 Jun 2024 15:56:45 +0530
Subject: scsi: mpi3mr: Add ioctl support for HDB

Add interface for applications to manage the host diagnostic buffers and
update the automatic diag buffer capture triggers.

Co-developed-by: Sathya Prakash <sathya.prakash@broadcom.com>
Signed-off-by: Sathya Prakash <sathya.prakash@broadcom.com>
Signed-off-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
Link: https://lore.kernel.org/r/20240626102646.14298-4-ranjan.kumar@broadcom.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/uapi/scsi/scsi_bsg_mpi3mr.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/scsi/scsi_bsg_mpi3mr.h b/include/uapi/scsi/scsi_bsg_mpi3mr.h
index a3ba779a3f78..f5ea1db92339 100644
--- a/include/uapi/scsi/scsi_bsg_mpi3mr.h
+++ b/include/uapi/scsi/scsi_bsg_mpi3mr.h
@@ -296,6 +296,7 @@ struct mpi3mr_hdb_entry {
  * multiple hdb entries.
  *
  * @num_hdb_types: Number of host diag buffer types supported
+ * @element_trigger_format: Element trigger format
  * @rsvd1: Reserved
  * @rsvd2: Reserved
  * @rsvd3: Reserved
@@ -303,7 +304,7 @@ struct mpi3mr_hdb_entry {
  */
 struct mpi3mr_bsg_in_hdb_status {
 	__u8	num_hdb_types;
-	__u8	rsvd1;
+	__u8    element_trigger_format;
 	__u16	rsvd2;
 	__u32	rsvd3;
 	struct mpi3mr_hdb_entry entry[1];
-- 
cgit v1.2.3


From 7276f425b744a81eb5a8e519b0c452d5e64b530c Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Thu, 27 Jun 2024 10:43:27 +0300
Subject: mfd: support ROHM BD96801 PMIC core

The ROHM BD96801 PMIC is highly customizable automotive grade PMIC
which integrates regulator and watchdog funtionalities.

Provide INTB IRQ and register accesses for regulator/watchdog drivers.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Link: https://lore.kernel.org/r/c5260e2dd222e3c64cdf410802bba195637ccb93.1719473802.git.mazziesaccount@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/rohm-bd96801.h | 215 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/rohm-generic.h |   1 +
 2 files changed, 216 insertions(+)
 create mode 100644 include/linux/mfd/rohm-bd96801.h

(limited to 'include')

diff --git a/include/linux/mfd/rohm-bd96801.h b/include/linux/mfd/rohm-bd96801.h
new file mode 100644
index 000000000000..e2d9e10b6364
--- /dev/null
+++ b/include/linux/mfd/rohm-bd96801.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright (C) 2024 ROHM Semiconductors */
+
+#ifndef __MFD_BD96801_H__
+#define __MFD_BD96801_H__
+
+#define BD96801_REG_SSCG_CTRL		0x09
+#define BD96801_REG_SHD_INTB            0x20
+#define BD96801_LDO5_VOL_LVL_REG	0x2c
+#define BD96801_LDO6_VOL_LVL_REG	0x2d
+#define BD96801_LDO7_VOL_LVL_REG	0x2e
+#define BD96801_REG_BUCK_OVP		0x30
+#define BD96801_REG_BUCK_OVD		0x35
+#define BD96801_REG_LDO_OVP		0x31
+#define BD96801_REG_LDO_OVD		0x36
+#define BD96801_REG_BOOT_OVERTIME	0x3a
+#define BD96801_REG_WD_TMO		0x40
+#define BD96801_REG_WD_CONF		0x41
+#define BD96801_REG_WD_FEED		0x42
+#define BD96801_REG_WD_FAILCOUNT	0x43
+#define BD96801_REG_WD_ASK		0x46
+#define BD96801_REG_WD_STATUS		0x4a
+#define BD96801_REG_PMIC_STATE		0x4f
+#define BD96801_REG_EXT_STATE		0x50
+
+#define BD96801_STATE_STBY		0x09
+
+#define BD96801_LOCK_REG		0x04
+#define BD96801_UNLOCK			0x9d
+#define BD96801_LOCK			0x00
+
+/* IRQ register area */
+#define BD96801_REG_INT_MAIN		0x51
+
+/*
+ * The BD96801 has two physical IRQ lines, INTB and ERRB.
+ *
+ * The 'main status register' is located at 0x51.
+ * The ERRB status registers are located at 0x52 ... 0x5B
+ * INTB status registers are at range 0x5c ... 0x63
+ */
+#define BD96801_REG_INT_SYS_ERRB1	0x52
+#define BD96801_REG_INT_SYS_INTB	0x5c
+#define BD96801_REG_INT_LDO7_INTB	0x63
+
+/* MASK registers */
+#define BD96801_REG_MASK_SYS_INTB	0x73
+#define BD96801_REG_MASK_SYS_ERRB	0x69
+
+#define BD96801_MAX_REGISTER		0x7a
+
+#define BD96801_OTP_ERR_MASK		BIT(0)
+#define BD96801_DBIST_ERR_MASK		BIT(1)
+#define BD96801_EEP_ERR_MASK		BIT(2)
+#define BD96801_ABIST_ERR_MASK		BIT(3)
+#define BD96801_PRSTB_ERR_MASK		BIT(4)
+#define BD96801_DRMOS1_ERR_MASK		BIT(5)
+#define BD96801_DRMOS2_ERR_MASK		BIT(6)
+#define BD96801_SLAVE_ERR_MASK		BIT(7)
+#define BD96801_VREF_ERR_MASK		BIT(0)
+#define BD96801_TSD_ERR_MASK		BIT(1)
+#define BD96801_UVLO_ERR_MASK		BIT(2)
+#define BD96801_OVLO_ERR_MASK		BIT(3)
+#define BD96801_OSC_ERR_MASK		BIT(4)
+#define BD96801_PON_ERR_MASK		BIT(5)
+#define BD96801_POFF_ERR_MASK		BIT(6)
+#define BD96801_CMD_SHDN_ERR_MASK	BIT(7)
+#define BD96801_INT_PRSTB_WDT_ERR_MASK	BIT(0)
+#define BD96801_INT_CHIP_IF_ERR_MASK	BIT(3)
+#define BD96801_INT_SHDN_ERR_MASK	BIT(7)
+#define BD96801_OUT_PVIN_ERR_MASK	BIT(0)
+#define BD96801_OUT_OVP_ERR_MASK	BIT(1)
+#define BD96801_OUT_UVP_ERR_MASK	BIT(2)
+#define BD96801_OUT_SHDN_ERR_MASK	BIT(7)
+
+/* ERRB IRQs */
+enum {
+	/* Reg 0x52, 0x53, 0x54 - ERRB system IRQs */
+	BD96801_OTP_ERR_STAT,
+	BD96801_DBIST_ERR_STAT,
+	BD96801_EEP_ERR_STAT,
+	BD96801_ABIST_ERR_STAT,
+	BD96801_PRSTB_ERR_STAT,
+	BD96801_DRMOS1_ERR_STAT,
+	BD96801_DRMOS2_ERR_STAT,
+	BD96801_SLAVE_ERR_STAT,
+	BD96801_VREF_ERR_STAT,
+	BD96801_TSD_ERR_STAT,
+	BD96801_UVLO_ERR_STAT,
+	BD96801_OVLO_ERR_STAT,
+	BD96801_OSC_ERR_STAT,
+	BD96801_PON_ERR_STAT,
+	BD96801_POFF_ERR_STAT,
+	BD96801_CMD_SHDN_ERR_STAT,
+	BD96801_INT_PRSTB_WDT_ERR,
+	BD96801_INT_CHIP_IF_ERR,
+	BD96801_INT_SHDN_ERR_STAT,
+
+	/* Reg 0x55 BUCK1 ERR IRQs */
+	BD96801_BUCK1_PVIN_ERR_STAT,
+	BD96801_BUCK1_OVP_ERR_STAT,
+	BD96801_BUCK1_UVP_ERR_STAT,
+	BD96801_BUCK1_SHDN_ERR_STAT,
+
+	/* Reg 0x56 BUCK2 ERR IRQs */
+	BD96801_BUCK2_PVIN_ERR_STAT,
+	BD96801_BUCK2_OVP_ERR_STAT,
+	BD96801_BUCK2_UVP_ERR_STAT,
+	BD96801_BUCK2_SHDN_ERR_STAT,
+
+	/* Reg 0x57 BUCK3 ERR IRQs */
+	BD96801_BUCK3_PVIN_ERR_STAT,
+	BD96801_BUCK3_OVP_ERR_STAT,
+	BD96801_BUCK3_UVP_ERR_STAT,
+	BD96801_BUCK3_SHDN_ERR_STAT,
+
+	/* Reg 0x58 BUCK4 ERR IRQs */
+	BD96801_BUCK4_PVIN_ERR_STAT,
+	BD96801_BUCK4_OVP_ERR_STAT,
+	BD96801_BUCK4_UVP_ERR_STAT,
+	BD96801_BUCK4_SHDN_ERR_STAT,
+
+	/* Reg 0x59 LDO5 ERR IRQs */
+	BD96801_LDO5_PVIN_ERR_STAT,
+	BD96801_LDO5_OVP_ERR_STAT,
+	BD96801_LDO5_UVP_ERR_STAT,
+	BD96801_LDO5_SHDN_ERR_STAT,
+
+	/* Reg 0x5a LDO6 ERR IRQs */
+	BD96801_LDO6_PVIN_ERR_STAT,
+	BD96801_LDO6_OVP_ERR_STAT,
+	BD96801_LDO6_UVP_ERR_STAT,
+	BD96801_LDO6_SHDN_ERR_STAT,
+
+	/* Reg 0x5b LDO7 ERR IRQs */
+	BD96801_LDO7_PVIN_ERR_STAT,
+	BD96801_LDO7_OVP_ERR_STAT,
+	BD96801_LDO7_UVP_ERR_STAT,
+	BD96801_LDO7_SHDN_ERR_STAT,
+};
+
+/* INTB IRQs */
+enum {
+	/* Reg 0x5c (System INTB) */
+	BD96801_TW_STAT,
+	BD96801_WDT_ERR_STAT,
+	BD96801_I2C_ERR_STAT,
+	BD96801_CHIP_IF_ERR_STAT,
+
+	/* Reg 0x5d (BUCK1 INTB) */
+	BD96801_BUCK1_OCPH_STAT,
+	BD96801_BUCK1_OCPL_STAT,
+	BD96801_BUCK1_OCPN_STAT,
+	BD96801_BUCK1_OVD_STAT,
+	BD96801_BUCK1_UVD_STAT,
+	BD96801_BUCK1_TW_CH_STAT,
+
+	/* Reg 0x5e (BUCK2 INTB) */
+	BD96801_BUCK2_OCPH_STAT,
+	BD96801_BUCK2_OCPL_STAT,
+	BD96801_BUCK2_OCPN_STAT,
+	BD96801_BUCK2_OVD_STAT,
+	BD96801_BUCK2_UVD_STAT,
+	BD96801_BUCK2_TW_CH_STAT,
+
+	/* Reg 0x5f (BUCK3 INTB)*/
+	BD96801_BUCK3_OCPH_STAT,
+	BD96801_BUCK3_OCPL_STAT,
+	BD96801_BUCK3_OCPN_STAT,
+	BD96801_BUCK3_OVD_STAT,
+	BD96801_BUCK3_UVD_STAT,
+	BD96801_BUCK3_TW_CH_STAT,
+
+	/* Reg 0x60 (BUCK4 INTB)*/
+	BD96801_BUCK4_OCPH_STAT,
+	BD96801_BUCK4_OCPL_STAT,
+	BD96801_BUCK4_OCPN_STAT,
+	BD96801_BUCK4_OVD_STAT,
+	BD96801_BUCK4_UVD_STAT,
+	BD96801_BUCK4_TW_CH_STAT,
+
+	/* Reg 0x61 (LDO5 INTB) */
+	BD96801_LDO5_OCPH_STAT, /* bit [0] */
+	BD96801_LDO5_OVD_STAT,	/* bit [3] */
+	BD96801_LDO5_UVD_STAT,  /* bit [4] */
+
+	/* Reg 0x62 (LDO6 INTB) */
+	BD96801_LDO6_OCPH_STAT, /* bit [0] */
+	BD96801_LDO6_OVD_STAT,	/* bit [3] */
+	BD96801_LDO6_UVD_STAT,  /* bit [4] */
+
+	/* Reg 0x63 (LDO7 INTB) */
+	BD96801_LDO7_OCPH_STAT, /* bit [0] */
+	BD96801_LDO7_OVD_STAT,	/* bit [3] */
+	BD96801_LDO7_UVD_STAT,  /* bit [4] */
+};
+
+/* IRQ MASKs */
+#define BD96801_TW_STAT_MASK		BIT(0)
+#define BD96801_WDT_ERR_STAT_MASK	BIT(1)
+#define BD96801_I2C_ERR_STAT_MASK	BIT(2)
+#define BD96801_CHIP_IF_ERR_STAT_MASK	BIT(3)
+
+#define BD96801_BUCK_OCPH_STAT_MASK	BIT(0)
+#define BD96801_BUCK_OCPL_STAT_MASK	BIT(1)
+#define BD96801_BUCK_OCPN_STAT_MASK	BIT(2)
+#define BD96801_BUCK_OVD_STAT_MASK	BIT(3)
+#define BD96801_BUCK_UVD_STAT_MASK	BIT(4)
+#define BD96801_BUCK_TW_CH_STAT_MASK	BIT(5)
+
+#define BD96801_LDO_OCPH_STAT_MASK	BIT(0)
+#define BD96801_LDO_OVD_STAT_MASK	BIT(3)
+#define BD96801_LDO_UVD_STAT_MASK	BIT(4)
+
+#endif
diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h
index 4eeb22876bad..e7d4e6afe388 100644
--- a/include/linux/mfd/rohm-generic.h
+++ b/include/linux/mfd/rohm-generic.h
@@ -16,6 +16,7 @@ enum rohm_chip_type {
 	ROHM_CHIP_TYPE_BD71828,
 	ROHM_CHIP_TYPE_BD71837,
 	ROHM_CHIP_TYPE_BD71847,
+	ROHM_CHIP_TYPE_BD96801,
 	ROHM_CHIP_TYPE_AMOUNT
 };
 
-- 
cgit v1.2.3


From 67eccf151d76a9939ad8a50c6db5cb486b01df24 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Wed, 26 Jun 2024 15:46:23 +0200
Subject: HID: add source argument to HID low level functions

This allows to know who actually sent what when we process the request
to the device.
This will be useful for a BPF firewall program to allow or not requests
coming from a dedicated hidraw node client.

Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-2-cfd60fb6c79f@kernel.org
Acked-by: Jiri Kosina <jkosina@suse.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h     |  6 ++++++
 include/linux/hid_bpf.h | 16 ++++++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 8e06d89698e6..dac2804b4562 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1125,6 +1125,12 @@ int __must_check hid_hw_open(struct hid_device *hdev);
 void hid_hw_close(struct hid_device *hdev);
 void hid_hw_request(struct hid_device *hdev,
 		    struct hid_report *report, enum hid_class_request reqtype);
+int __hid_hw_raw_request(struct hid_device *hdev,
+			 unsigned char reportnum, __u8 *buf,
+			 size_t len, enum hid_report_type rtype,
+			 enum hid_class_request reqtype,
+			 __u64 source);
+int __hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len, __u64 source);
 int hid_hw_raw_request(struct hid_device *hdev,
 		       unsigned char reportnum, __u8 *buf,
 		       size_t len, enum hid_report_type rtype,
diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index 65d7e0acc8c2..a54741db0415 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -66,10 +66,12 @@ struct hid_ops {
 	int (*hid_hw_raw_request)(struct hid_device *hdev,
 				  unsigned char reportnum, __u8 *buf,
 				  size_t len, enum hid_report_type rtype,
-				  enum hid_class_request reqtype);
-	int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len);
+				  enum hid_class_request reqtype,
+				  __u64 source);
+	int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len,
+				    __u64 source);
 	int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type,
-				u8 *data, u32 size, int interrupt);
+				u8 *data, u32 size, int interrupt, u64 source);
 	struct module *owner;
 	const struct bus_type *bus_type;
 };
@@ -110,7 +112,8 @@ struct hid_bpf_ops {
 	 *
 	 * Context: Interrupt context.
 	 */
-	int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type);
+	int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type,
+				__u64 source);
 
 	/**
 	 * @hid_rdesc_fixup: called when the probe function parses the report descriptor
@@ -146,7 +149,7 @@ struct hid_bpf {
 
 #ifdef CONFIG_HID_BPF
 u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data,
-				  u32 *size, int interrupt);
+				  u32 *size, int interrupt, u64 source);
 int hid_bpf_connect_device(struct hid_device *hdev);
 void hid_bpf_disconnect_device(struct hid_device *hdev);
 void hid_bpf_destroy_device(struct hid_device *hid);
@@ -154,7 +157,8 @@ void hid_bpf_device_init(struct hid_device *hid);
 u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size);
 #else /* CONFIG_HID_BPF */
 static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type,
-						u8 *data, u32 *size, int interrupt) { return data; }
+						u8 *data, u32 *size, int interrupt,
+						u64 source) { return data; }
 static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; }
 static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {}
 static inline void hid_bpf_destroy_device(struct hid_device *hid) {}
-- 
cgit v1.2.3


From 6cd735f0e57a6c8510ad92f5b63837a8d0cff3a7 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Wed, 26 Jun 2024 15:46:24 +0200
Subject: HID: bpf: protect HID-BPF prog_list access by a SRCU

We want to add sleepable callbacks for hid_hw_raw_request() and
hid_hw_output_report(), but we can not use a plain RCU for those.

Prepare for a SRCU so we can extend HID-BPF.

This changes a little bit how hid_bpf_device_init() behaves, as it may
now fail, so there is a tiny hid-core.c change to accommodate for this.

Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-3-cfd60fb6c79f@kernel.org
Acked-by: Jiri Kosina <jkosina@suse.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index a54741db0415..f93845de5cac 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -5,6 +5,7 @@
 
 #include <linux/bpf.h>
 #include <linux/mutex.h>
+#include <linux/srcu.h>
 #include <uapi/linux/hid.h>
 
 struct hid_device;
@@ -145,6 +146,7 @@ struct hid_bpf {
 	struct hid_bpf_ops *rdesc_ops;
 	struct list_head prog_list;
 	struct mutex prog_list_lock;	/* protects prog_list update */
+	struct srcu_struct srcu;	/* protects prog_list read-only access */
 };
 
 #ifdef CONFIG_HID_BPF
@@ -153,7 +155,7 @@ u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type t
 int hid_bpf_connect_device(struct hid_device *hdev);
 void hid_bpf_disconnect_device(struct hid_device *hdev);
 void hid_bpf_destroy_device(struct hid_device *hid);
-void hid_bpf_device_init(struct hid_device *hid);
+int hid_bpf_device_init(struct hid_device *hid);
 u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size);
 #else /* CONFIG_HID_BPF */
 static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type,
@@ -162,7 +164,7 @@ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid
 static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; }
 static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {}
 static inline void hid_bpf_destroy_device(struct hid_device *hid) {}
-static inline void hid_bpf_device_init(struct hid_device *hid) {}
+static inline int hid_bpf_device_init(struct hid_device *hid) { return 0; }
 #define call_hid_bpf_rdesc_fixup(_hdev, _rdesc, _size)	\
 		((u8 *)kmemdup(_rdesc, *(_size), GFP_KERNEL))
 
-- 
cgit v1.2.3


From 8bd0488b5ea58655ad6fdcbe0408ef49b16882b1 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Wed, 26 Jun 2024 15:46:25 +0200
Subject: HID: bpf: add HID-BPF hooks for hid_hw_raw_requests

This allows to intercept and prevent or change the behavior of
hid_hw_raw_request() from a bpf program.

The intent is to solve a couple of use case:
- firewalling a HID device: a firewall can monitor who opens the hidraw
  nodes and then prevent or allow access to write operations on that
  hidraw node.
- change the behavior of a device and emulate a new HID feature request

The hook is allowed to be run as sleepable so it can itself call
hid_bpf_hw_request(), which allows to "convert" one feature request into
another or even call the feature request on a different HID device on the
same physical device.

Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-4-cfd60fb6c79f@kernel.org
Acked-by: Jiri Kosina <jkosina@suse.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index f93845de5cac..3c01f7f8b6fc 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -130,6 +130,31 @@ struct hid_bpf_ops {
 	 */
 	int (*hid_rdesc_fixup)(struct hid_bpf_ctx *ctx);
 
+	/**
+	 * @hid_hw_request: called whenever a hid_hw_raw_request() call is emitted
+	 * on the HID device
+	 *
+	 * It has the following arguments:
+	 *
+	 * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx
+	 * ``reportnum``: the report number, as in hid_hw_raw_request()
+	 * ``rtype``: the report type (``HID_INPUT_REPORT``, ``HID_FEATURE_REPORT``,
+	 *            ``HID_OUTPUT_REPORT``)
+	 * ``reqtype``: the request
+	 * ``source``: a u64 referring to a uniq but identifiable source. If %0, the
+	 *             kernel itself emitted that call. For hidraw, ``source`` is set
+	 *             to the associated ``struct file *``.
+	 *
+	 * Return: %0 to keep processing the request by hid-core; any other value
+	 * stops hid-core from processing that event. A positive value should be
+	 * returned with the number of bytes returned in the incoming buffer; a
+	 * negative error code interrupts the processing of this call.
+	 */
+	int (*hid_hw_request)(struct hid_bpf_ctx *ctx, unsigned char reportnum,
+			       enum hid_report_type rtype, enum hid_class_request reqtype,
+			       __u64 source);
+
+
 	/* private: do not show up in the docs */
 	struct hid_device *hdev;
 };
@@ -152,6 +177,11 @@ struct hid_bpf {
 #ifdef CONFIG_HID_BPF
 u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data,
 				  u32 *size, int interrupt, u64 source);
+int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
+				  unsigned char reportnum, __u8 *buf,
+				  u32 size, enum hid_report_type rtype,
+				  enum hid_class_request reqtype,
+				  __u64 source);
 int hid_bpf_connect_device(struct hid_device *hdev);
 void hid_bpf_disconnect_device(struct hid_device *hdev);
 void hid_bpf_destroy_device(struct hid_device *hid);
@@ -161,6 +191,11 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *s
 static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type,
 						u8 *data, u32 *size, int interrupt,
 						u64 source) { return data; }
+static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
+						unsigned char reportnum, u8 *buf,
+						u32 size, enum hid_report_type rtype,
+						enum hid_class_request reqtype,
+						u64 source) { return 0; }
 static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; }
 static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {}
 static inline void hid_bpf_destroy_device(struct hid_device *hid) {}
-- 
cgit v1.2.3


From 75839101ce52e319cb2154a027d14f1f0aa3be09 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Wed, 26 Jun 2024 15:46:26 +0200
Subject: HID: bpf: prevent infinite recursions with hid_hw_raw_requests hooks

When we attach a sleepable hook to hid_hw_raw_requests, we can (and in
many cases should) call ourself hid_bpf_raw_request(), to actually fetch
data from the device itself.

However, this means that we might enter an infinite loop between
hid_hw_raw_requests hooks and hid_bpf_hw_request() call.

To prevent that, if a hid_bpf_hw_request() call is emitted, we prevent
any new call of this kfunc by storing the information in the context.
This way we can always trace/monitor/filter the incoming bpf requests,
while preventing those loops to happen.

I don't think exposing "from_bpf" is very interesting because while
writing such a bpf program, you need to match at least the report number
and/or the source of the call. So a blind "if there is a
hid_hw_raw_request() call, I'm emitting another one" makes no real
sense.

Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-5-cfd60fb6c79f@kernel.org
Acked-by: Jiri Kosina <jkosina@suse.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h     | 2 +-
 include/linux/hid_bpf.h | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index dac2804b4562..24d0d7c0bd33 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1129,7 +1129,7 @@ int __hid_hw_raw_request(struct hid_device *hdev,
 			 unsigned char reportnum, __u8 *buf,
 			 size_t len, enum hid_report_type rtype,
 			 enum hid_class_request reqtype,
-			 __u64 source);
+			 __u64 source, bool from_bpf);
 int __hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len, __u64 source);
 int hid_hw_raw_request(struct hid_device *hdev,
 		       unsigned char reportnum, __u8 *buf,
diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index 3c01f7f8b6fc..088c94b6d8ec 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -68,7 +68,7 @@ struct hid_ops {
 				  unsigned char reportnum, __u8 *buf,
 				  size_t len, enum hid_report_type rtype,
 				  enum hid_class_request reqtype,
-				  __u64 source);
+				  __u64 source, bool from_bpf);
 	int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len,
 				    __u64 source);
 	int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type,
@@ -181,7 +181,7 @@ int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
 				  unsigned char reportnum, __u8 *buf,
 				  u32 size, enum hid_report_type rtype,
 				  enum hid_class_request reqtype,
-				  __u64 source);
+				  __u64 source, bool from_bpf);
 int hid_bpf_connect_device(struct hid_device *hdev);
 void hid_bpf_disconnect_device(struct hid_device *hdev);
 void hid_bpf_destroy_device(struct hid_device *hid);
@@ -195,7 +195,7 @@ static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
 						unsigned char reportnum, u8 *buf,
 						u32 size, enum hid_report_type rtype,
 						enum hid_class_request reqtype,
-						u64 source) { return 0; }
+						u64 source, bool from_bpf) { return 0; }
 static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; }
 static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {}
 static inline void hid_bpf_destroy_device(struct hid_device *hid) {}
-- 
cgit v1.2.3


From 9286675a2aed40a517be8cc4e283a04f473275b5 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Wed, 26 Jun 2024 15:46:28 +0200
Subject: HID: bpf: add HID-BPF hooks for hid_hw_output_report

Same story than hid_hw_raw_requests:

This allows to intercept and prevent or change the behavior of
hid_hw_output_report() from a bpf program.

The intent is to solve a couple of use case:
  - firewalling a HID device: a firewall can monitor who opens the hidraw
    nodes and then prevent or allow access to write operations on that
    hidraw node.
  - change the behavior of a device and emulate a new HID feature request

The hook is allowed to be run as sleepable so it can itself call
hid_hw_output_report(), which allows to "convert" one feature request into
another or even call the feature request on a different HID device on the
same physical device.

Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-7-cfd60fb6c79f@kernel.org
Acked-by: Jiri Kosina <jkosina@suse.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid.h     |  3 ++-
 include/linux/hid_bpf.h | 24 +++++++++++++++++++++++-
 2 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 24d0d7c0bd33..1533c9dcd3a6 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1130,7 +1130,8 @@ int __hid_hw_raw_request(struct hid_device *hdev,
 			 size_t len, enum hid_report_type rtype,
 			 enum hid_class_request reqtype,
 			 __u64 source, bool from_bpf);
-int __hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len, __u64 source);
+int __hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len, __u64 source,
+			   bool from_bpf);
 int hid_hw_raw_request(struct hid_device *hdev,
 		       unsigned char reportnum, __u8 *buf,
 		       size_t len, enum hid_report_type rtype,
diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index 088c94b6d8ec..f35508a73067 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -70,7 +70,7 @@ struct hid_ops {
 				  enum hid_class_request reqtype,
 				  __u64 source, bool from_bpf);
 	int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len,
-				    __u64 source);
+				    __u64 source, bool from_bpf);
 	int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type,
 				u8 *data, u32 size, int interrupt, u64 source);
 	struct module *owner;
@@ -154,6 +154,24 @@ struct hid_bpf_ops {
 			       enum hid_report_type rtype, enum hid_class_request reqtype,
 			       __u64 source);
 
+	/**
+	 * @hid_hw_output_report: called whenever a hid_hw_output_report() call is emitted
+	 * on the HID device
+	 *
+	 * It has the following arguments:
+	 *
+	 * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx
+	 * ``source``: a u64 referring to a uniq but identifiable source. If %0, the
+	 *             kernel itself emitted that call. For hidraw, ``source`` is set
+	 *             to the associated ``struct file *``.
+	 *
+	 * Return: %0 to keep processing the request by hid-core; any other value
+	 * stops hid-core from processing that event. A positive value should be
+	 * returned with the number of bytes written to the device; a negative error
+	 * code interrupts the processing of this call.
+	 */
+	int (*hid_hw_output_report)(struct hid_bpf_ctx *ctx, __u64 source);
+
 
 	/* private: do not show up in the docs */
 	struct hid_device *hdev;
@@ -182,6 +200,8 @@ int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
 				  u32 size, enum hid_report_type rtype,
 				  enum hid_class_request reqtype,
 				  __u64 source, bool from_bpf);
+int dispatch_hid_bpf_output_report(struct hid_device *hdev, __u8 *buf, u32 size,
+				   __u64 source, bool from_bpf);
 int hid_bpf_connect_device(struct hid_device *hdev);
 void hid_bpf_disconnect_device(struct hid_device *hdev);
 void hid_bpf_destroy_device(struct hid_device *hid);
@@ -196,6 +216,8 @@ static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
 						u32 size, enum hid_report_type rtype,
 						enum hid_class_request reqtype,
 						u64 source, bool from_bpf) { return 0; }
+static inline int dispatch_hid_bpf_output_report(struct hid_device *hdev, __u8 *buf, u32 size,
+						 __u64 source, bool from_bpf) { return 0; }
 static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; }
 static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {}
 static inline void hid_bpf_destroy_device(struct hid_device *hid) {}
-- 
cgit v1.2.3


From fa03f398a8ac46f46927e0b509b302ebe0ed7e8a Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Wed, 26 Jun 2024 15:46:30 +0200
Subject: HID: bpf: make hid_bpf_input_report() sleep until the device is ready

hid_bpf_input_report() is already marked to be used in sleepable context
only. So instead of hammering with timers the device to hopefully get
an available slot where the device is not sending events, we can make
that kfunc wait for the current event to be terminated before it goes in.

This allows to work with the following pseudo code:

in struct_ops/hid_device_event:
  - schedule a bpf_wq, which calls hid_bpf_input_report()
  - once this struct_ops function terminates, hid_bpf_input_report()
    immediately starts before the next event

Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-9-cfd60fb6c79f@kernel.org
Acked-by: Jiri Kosina <jkosina@suse.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index f35508a73067..7f04353d09e9 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -72,7 +72,8 @@ struct hid_ops {
 	int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len,
 				    __u64 source, bool from_bpf);
 	int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type,
-				u8 *data, u32 size, int interrupt, u64 source);
+				u8 *data, u32 size, int interrupt, u64 source,
+				bool lock_already_taken);
 	struct module *owner;
 	const struct bus_type *bus_type;
 };
-- 
cgit v1.2.3


From 9acbb7ba4589d4715141d4e14230a828ddc95f3d Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Wed, 26 Jun 2024 15:46:32 +0200
Subject: HID: bpf: allow hid_device_event hooks to inject input reports on
 self

This is the same logic than hid_hw_raw_request or hid_hw_output_report:
we can allow hid_bpf_try_input_report to be called from a hook on
hid_input_report if we ensure that the call can not be made twice in a
row.

There is one extra subtlety in which there is a lock in hid_input_report.
But given that we can detect if we are already in the hook, we can notify
hid_input_report to not take the lock. This is done by checking if
ctx_kern data is valid or null, and if it is equal to the dedicated
incoming data buffer.

In order to have more control on whether the lock needs to be taken or not
we introduce a new kfunc for it: hid_bpf_try_input_report()

Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-11-cfd60fb6c79f@kernel.org
Acked-by: Jiri Kosina <jkosina@suse.com>
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index 7f04353d09e9..93546ee7677a 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -72,7 +72,7 @@ struct hid_ops {
 	int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len,
 				    __u64 source, bool from_bpf);
 	int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type,
-				u8 *data, u32 size, int interrupt, u64 source,
+				u8 *data, u32 size, int interrupt, u64 source, bool from_bpf,
 				bool lock_already_taken);
 	struct module *owner;
 	const struct bus_type *bus_type;
@@ -195,7 +195,7 @@ struct hid_bpf {
 
 #ifdef CONFIG_HID_BPF
 u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data,
-				  u32 *size, int interrupt, u64 source);
+				  u32 *size, int interrupt, u64 source, bool from_bpf);
 int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
 				  unsigned char reportnum, __u8 *buf,
 				  u32 size, enum hid_report_type rtype,
@@ -211,7 +211,7 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *s
 #else /* CONFIG_HID_BPF */
 static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type,
 						u8 *data, u32 *size, int interrupt,
-						u64 source) { return data; }
+						u64 source, bool from_bpf) { return data; }
 static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
 						unsigned char reportnum, u8 *buf,
 						u32 size, enum hid_report_type rtype,
-- 
cgit v1.2.3


From bab4923132feb3e439ae45962979c5d9d5c7c1f1 Mon Sep 17 00:00:00 2001
From: Yunseong Kim <yskelg@gmail.com>
Date: Tue, 25 Jun 2024 02:33:23 +0900
Subject: tracing/net_sched: NULL pointer dereference in
 perf_trace_qdisc_reset()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the TRACE_EVENT(qdisc_reset) NULL dereference occurred from

 qdisc->dev_queue->dev <NULL> ->name

This situation simulated from bunch of veths and Bluetooth disconnection
and reconnection.

During qdisc initialization, qdisc was being set to noop_queue.
In veth_init_queue, the initial tx_num was reduced back to one,
causing the qdisc reset to be called with noop, which led to the kernel
panic.

I've attached the GitHub gist link that C converted syz-execprogram
source code and 3 log of reproduced vmcore-dmesg.

 https://gist.github.com/yskelg/cc64562873ce249cdd0d5a358b77d740

Yeoreum and I use two fuzzing tool simultaneously.

One process with syz-executor : https://github.com/google/syzkaller

 $ ./syz-execprog -executor=./syz-executor -repeat=1 -sandbox=setuid \
    -enable=none -collide=false log1

The other process with perf fuzzer:
 https://github.com/deater/perf_event_tests/tree/master/fuzzer

 $ perf_event_tests/fuzzer/perf_fuzzer

I think this will happen on the kernel version.

 Linux kernel version +v6.7.10, +v6.8, +v6.9 and it could happen in v6.10.

This occurred from 51270d573a8d. I think this patch is absolutely
necessary. Previously, It was showing not intended string value of name.

I've reproduced 3 time from my fedora 40 Debug Kernel with any other module
or patched.

 version: 6.10.0-0.rc2.20240608gitdc772f8237f9.29.fc41.aarch64+debug

[ 5287.164555] veth0_vlan: left promiscuous mode
[ 5287.164929] veth1_macvtap: left promiscuous mode
[ 5287.164950] veth0_macvtap: left promiscuous mode
[ 5287.164983] veth1_vlan: left promiscuous mode
[ 5287.165008] veth0_vlan: left promiscuous mode
[ 5287.165450] veth1_macvtap: left promiscuous mode
[ 5287.165472] veth0_macvtap: left promiscuous mode
[ 5287.165502] veth1_vlan: left promiscuous mode
…
[ 5297.598240] bridge0: port 2(bridge_slave_1) entered blocking state
[ 5297.598262] bridge0: port 2(bridge_slave_1) entered forwarding state
[ 5297.598296] bridge0: port 1(bridge_slave_0) entered blocking state
[ 5297.598313] bridge0: port 1(bridge_slave_0) entered forwarding state
[ 5297.616090] 8021q: adding VLAN 0 to HW filter on device bond0
[ 5297.620405] bridge0: port 1(bridge_slave_0) entered disabled state
[ 5297.620730] bridge0: port 2(bridge_slave_1) entered disabled state
[ 5297.627247] 8021q: adding VLAN 0 to HW filter on device team0
[ 5297.629636] bridge0: port 1(bridge_slave_0) entered blocking state
…
[ 5298.002798] bridge_slave_0: left promiscuous mode
[ 5298.002869] bridge0: port 1(bridge_slave_0) entered disabled state
[ 5298.309444] bond0 (unregistering): (slave bond_slave_0): Releasing backup interface
[ 5298.315206] bond0 (unregistering): (slave bond_slave_1): Releasing backup interface
[ 5298.320207] bond0 (unregistering): Released all slaves
[ 5298.354296] hsr_slave_0: left promiscuous mode
[ 5298.360750] hsr_slave_1: left promiscuous mode
[ 5298.374889] veth1_macvtap: left promiscuous mode
[ 5298.374931] veth0_macvtap: left promiscuous mode
[ 5298.374988] veth1_vlan: left promiscuous mode
[ 5298.375024] veth0_vlan: left promiscuous mode
[ 5299.109741] team0 (unregistering): Port device team_slave_1 removed
[ 5299.185870] team0 (unregistering): Port device team_slave_0 removed
…
[ 5300.155443] Bluetooth: hci3: unexpected cc 0x0c03 length: 249 > 1
[ 5300.155724] Bluetooth: hci3: unexpected cc 0x1003 length: 249 > 9
[ 5300.155988] Bluetooth: hci3: unexpected cc 0x1001 length: 249 > 9
….
[ 5301.075531] team0: Port device team_slave_1 added
[ 5301.085515] bridge0: port 1(bridge_slave_0) entered blocking state
[ 5301.085531] bridge0: port 1(bridge_slave_0) entered disabled state
[ 5301.085588] bridge_slave_0: entered allmulticast mode
[ 5301.085800] bridge_slave_0: entered promiscuous mode
[ 5301.095617] bridge0: port 1(bridge_slave_0) entered blocking state
[ 5301.095633] bridge0: port 1(bridge_slave_0) entered disabled state
…
[ 5301.149734] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link
[ 5301.173234] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link
[ 5301.180517] bond0: (slave bond_slave_1): Enslaving as an active interface with an up link
[ 5301.193481] hsr_slave_0: entered promiscuous mode
[ 5301.204425] hsr_slave_1: entered promiscuous mode
[ 5301.210172] debugfs: Directory 'hsr0' with parent 'hsr' already present!
[ 5301.210185] Cannot create hsr debugfs directory
[ 5301.224061] bond0: (slave bond_slave_1): Enslaving as an active interface with an up link
[ 5301.246901] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link
[ 5301.255934] team0: Port device team_slave_0 added
[ 5301.256480] team0: Port device team_slave_1 added
[ 5301.256948] team0: Port device team_slave_0 added
…
[ 5301.435928] hsr_slave_0: entered promiscuous mode
[ 5301.446029] hsr_slave_1: entered promiscuous mode
[ 5301.455872] debugfs: Directory 'hsr0' with parent 'hsr' already present!
[ 5301.455884] Cannot create hsr debugfs directory
[ 5301.502664] hsr_slave_0: entered promiscuous mode
[ 5301.513675] hsr_slave_1: entered promiscuous mode
[ 5301.526155] debugfs: Directory 'hsr0' with parent 'hsr' already present!
[ 5301.526164] Cannot create hsr debugfs directory
[ 5301.563662] hsr_slave_0: entered promiscuous mode
[ 5301.576129] hsr_slave_1: entered promiscuous mode
[ 5301.580259] debugfs: Directory 'hsr0' with parent 'hsr' already present!
[ 5301.580270] Cannot create hsr debugfs directory
[ 5301.590269] 8021q: adding VLAN 0 to HW filter on device bond0

[ 5301.595872] KASAN: null-ptr-deref in range [0x0000000000000130-0x0000000000000137]
[ 5301.595877] Mem abort info:
[ 5301.595881]   ESR = 0x0000000096000006
[ 5301.595885]   EC = 0x25: DABT (current EL), IL = 32 bits
[ 5301.595889]   SET = 0, FnV = 0
[ 5301.595893]   EA = 0, S1PTW = 0
[ 5301.595896]   FSC = 0x06: level 2 translation fault
[ 5301.595900] Data abort info:
[ 5301.595903]   ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
[ 5301.595907]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[ 5301.595911]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[ 5301.595915] [dfff800000000026] address between user and kernel address ranges
[ 5301.595971] Internal error: Oops: 0000000096000006 [#1] SMP
…
[ 5301.596076] CPU: 2 PID: 102769 Comm:
syz-executor.3 Kdump: loaded Tainted:
 G        W         -------  ---  6.10.0-0.rc2.20240608gitdc772f8237f9.29.fc41.aarch64+debug #1
[ 5301.596080] Hardware name: VMware, Inc. VMware20,1/VBSA,
 BIOS VMW201.00V.21805430.BA64.2305221830 05/22/2023
[ 5301.596082] pstate: 01400005 (nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
[ 5301.596085] pc : strnlen+0x40/0x88
[ 5301.596114] lr : trace_event_get_offsets_qdisc_reset+0x6c/0x2b0
[ 5301.596124] sp : ffff8000beef6b40
[ 5301.596126] x29: ffff8000beef6b40 x28: dfff800000000000 x27: 0000000000000001
[ 5301.596131] x26: 6de1800082c62bd0 x25: 1ffff000110aa9e0 x24: ffff800088554f00
[ 5301.596136] x23: ffff800088554ec0 x22: 0000000000000130 x21: 0000000000000140
[ 5301.596140] x20: dfff800000000000 x19: ffff8000beef6c60 x18: ffff7000115106d8
[ 5301.596143] x17: ffff800121bad000 x16: ffff800080020000 x15: 0000000000000006
[ 5301.596147] x14: 0000000000000002 x13: ffff0001f3ed8d14 x12: ffff700017ddeda5
[ 5301.596151] x11: 1ffff00017ddeda4 x10: ffff700017ddeda4 x9 : ffff800082cc5eec
[ 5301.596155] x8 : 0000000000000004 x7 : 00000000f1f1f1f1 x6 : 00000000f2f2f200
[ 5301.596158] x5 : 00000000f3f3f3f3 x4 : ffff700017dded80 x3 : 00000000f204f1f1
[ 5301.596162] x2 : 0000000000000026 x1 : 0000000000000000 x0 : 0000000000000130
[ 5301.596166] Call trace:
[ 5301.596175]  strnlen+0x40/0x88
[ 5301.596179]  trace_event_get_offsets_qdisc_reset+0x6c/0x2b0
[ 5301.596182]  perf_trace_qdisc_reset+0xb0/0x538
[ 5301.596184]  __traceiter_qdisc_reset+0x68/0xc0
[ 5301.596188]  qdisc_reset+0x43c/0x5e8
[ 5301.596190]  netif_set_real_num_tx_queues+0x288/0x770
[ 5301.596194]  veth_init_queues+0xfc/0x130 [veth]
[ 5301.596198]  veth_newlink+0x45c/0x850 [veth]
[ 5301.596202]  rtnl_newlink_create+0x2c8/0x798
[ 5301.596205]  __rtnl_newlink+0x92c/0xb60
[ 5301.596208]  rtnl_newlink+0xd8/0x130
[ 5301.596211]  rtnetlink_rcv_msg+0x2e0/0x890
[ 5301.596214]  netlink_rcv_skb+0x1c4/0x380
[ 5301.596225]  rtnetlink_rcv+0x20/0x38
[ 5301.596227]  netlink_unicast+0x3c8/0x640
[ 5301.596231]  netlink_sendmsg+0x658/0xa60
[ 5301.596234]  __sock_sendmsg+0xd0/0x180
[ 5301.596243]  __sys_sendto+0x1c0/0x280
[ 5301.596246]  __arm64_sys_sendto+0xc8/0x150
[ 5301.596249]  invoke_syscall+0xdc/0x268
[ 5301.596256]  el0_svc_common.constprop.0+0x16c/0x240
[ 5301.596259]  do_el0_svc+0x48/0x68
[ 5301.596261]  el0_svc+0x50/0x188
[ 5301.596265]  el0t_64_sync_handler+0x120/0x130
[ 5301.596268]  el0t_64_sync+0x194/0x198
[ 5301.596272] Code: eb15001f 54000120 d343fc02 12000801 (38f46842)
[ 5301.596285] SMP: stopping secondary CPUs
[ 5301.597053] Starting crashdump kernel...
[ 5301.597057] Bye!

After applying our patch, I didn't find any kernel panic errors.

We've found a simple reproducer

 # echo 1 > /sys/kernel/debug/tracing/events/qdisc/qdisc_reset/enable

 # ip link add veth0 type veth peer name veth1

 Error: Unknown device type.

However, without our patch applied, I tested upstream 6.10.0-rc3 kernel
using the qdisc_reset event and the ip command on my qemu virtual machine.

This 2 commands makes always kernel panic.

Linux version: 6.10.0-rc3

[    0.000000] Linux version 6.10.0-rc3-00164-g44ef20baed8e-dirty
(paran@fedora) (gcc (GCC) 14.1.1 20240522 (Red Hat 14.1.1-4), GNU ld
version 2.41-34.fc40) #20 SMP PREEMPT Sat Jun 15 16:51:25 KST 2024

Kernel panic message:

[  615.236484] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP
[  615.237250] Dumping ftrace buffer:
[  615.237679]    (ftrace buffer empty)
[  615.238097] Modules linked in: veth crct10dif_ce virtio_gpu
virtio_dma_buf drm_shmem_helper drm_kms_helper zynqmp_fpga xilinx_can
xilinx_spi xilinx_selectmap xilinx_core xilinx_pr_decoupler versal_fpga
uvcvideo uvc videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videodev
videobuf2_common mc usbnet deflate zstd ubifs ubi rcar_canfd rcar_can
omap_mailbox ntb_msi_test ntb_hw_epf lattice_sysconfig_spi
lattice_sysconfig ice40_spi gpio_xilinx dwmac_altr_socfpga mdio_regmap
stmmac_platform stmmac pcs_xpcs dfl_fme_region dfl_fme_mgr dfl_fme_br
dfl_afu dfl fpga_region fpga_bridge can can_dev br_netfilter bridge stp
llc atl1c ath11k_pci mhi ath11k_ahb ath11k qmi_helpers ath10k_sdio
ath10k_pci ath10k_core ath mac80211 libarc4 cfg80211 drm fuse backlight ipv6
Jun 22 02:36:5[3   6k152.62-4sm98k4-0k]v  kCePUr:n e1l :P IUDn:a b4le6
8t oC ohmma: nidpl eN oketr nteali nptaedg i6n.g1 0re.0q-urecs3t- 0at0
1v6i4r-tgu4a4le fa2d0dbraeeds0se-dir tyd f#f2f08
  615.252376] Hardware name: linux,dummy-virt (DT)
[  615.253220] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS
BTYPE=--)
[  615.254433] pc : strnlen+0x6c/0xe0
[  615.255096] lr : trace_event_get_offsets_qdisc_reset+0x94/0x3d0
[  615.256088] sp : ffff800080b269a0
[  615.256615] x29: ffff800080b269a0 x28: ffffc070f3f98500 x27:
0000000000000001
[  615.257831] x26: 0000000000000010 x25: ffffc070f3f98540 x24:
ffffc070f619cf60
[  615.259020] x23: 0000000000000128 x22: 0000000000000138 x21:
dfff800000000000
[  615.260241] x20: ffffc070f631ad00 x19: 0000000000000128 x18:
ffffc070f448b800
[  615.261454] x17: 0000000000000000 x16: 0000000000000001 x15:
ffffc070f4ba2a90
[  615.262635] x14: ffff700010164d73 x13: 1ffff80e1e8d5eb3 x12:
1ffff00010164d72
[  615.263877] x11: ffff700010164d72 x10: dfff800000000000 x9 :
ffffc070e85d6184
[  615.265047] x8 : ffffc070e4402070 x7 : 000000000000f1f1 x6 :
000000001504a6d3
[  615.266336] x5 : ffff28ca21122140 x4 : ffffc070f5043ea8 x3 :
0000000000000000
[  615.267528] x2 : 0000000000000025 x1 : 0000000000000000 x0 :
0000000000000000
[  615.268747] Call trace:
[  615.269180]  strnlen+0x6c/0xe0
[  615.269767]  trace_event_get_offsets_qdisc_reset+0x94/0x3d0
[  615.270716]  trace_event_raw_event_qdisc_reset+0xe8/0x4e8
[  615.271667]  __traceiter_qdisc_reset+0xa0/0x140
[  615.272499]  qdisc_reset+0x554/0x848
[  615.273134]  netif_set_real_num_tx_queues+0x360/0x9a8
[  615.274050]  veth_init_queues+0x110/0x220 [veth]
[  615.275110]  veth_newlink+0x538/0xa50 [veth]
[  615.276172]  __rtnl_newlink+0x11e4/0x1bc8
[  615.276944]  rtnl_newlink+0xac/0x120
[  615.277657]  rtnetlink_rcv_msg+0x4e4/0x1370
[  615.278409]  netlink_rcv_skb+0x25c/0x4f0
[  615.279122]  rtnetlink_rcv+0x48/0x70
[  615.279769]  netlink_unicast+0x5a8/0x7b8
[  615.280462]  netlink_sendmsg+0xa70/0x1190

Yeoreum and I don't know if the patch we wrote will fix the underlying
cause, but we think that priority is to prevent kernel panic happening.
So, we're sending this patch.

Fixes: 51270d573a8d ("tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string")
Link: https://lore.kernel.org/lkml/20240229143432.273b4871@gandalf.local.home/t/
Cc: netdev@vger.kernel.org
Tested-by: Yunseong Kim <yskelg@gmail.com>
Signed-off-by: Yunseong Kim <yskelg@gmail.com>
Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
Link: https://lore.kernel.org/r/20240624173320.24945-4-yskelg@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/trace/events/qdisc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
index f1b5e816e7e5..ff33f41a9db7 100644
--- a/include/trace/events/qdisc.h
+++ b/include/trace/events/qdisc.h
@@ -81,7 +81,7 @@ TRACE_EVENT(qdisc_reset,
 	TP_ARGS(q),
 
 	TP_STRUCT__entry(
-		__string(	dev,		qdisc_dev(q)->name	)
+		__string(	dev,		qdisc_dev(q) ? qdisc_dev(q)->name : "(null)"	)
 		__string(	kind,		q->ops->id		)
 		__field(	u32,		parent			)
 		__field(	u32,		handle			)
-- 
cgit v1.2.3


From 6d4618ad04e1a14202410648f638b62d3f666d45 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Wed, 19 Jun 2024 20:21:47 +0200
Subject: drm/bridge: analogix_dp: remove unused platform power_on_end callback

This isn't used, but gives the impression of the power on and power off
platform calls being non-symmetrical. Remove the unused callback and
rename the power_on_start to simply power_on.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Robert Foss <rfoss@kernel.org>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Robert Foss <rfoss@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240619182200.3752465-1-l.stach@pengutronix.de
---
 include/drm/bridge/analogix_dp.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/bridge/analogix_dp.h b/include/drm/bridge/analogix_dp.h
index b0dcc07334a1..8709b6a74c0f 100644
--- a/include/drm/bridge/analogix_dp.h
+++ b/include/drm/bridge/analogix_dp.h
@@ -29,8 +29,7 @@ struct analogix_dp_plat_data {
 	struct drm_connector *connector;
 	bool skip_connector;
 
-	int (*power_on_start)(struct analogix_dp_plat_data *);
-	int (*power_on_end)(struct analogix_dp_plat_data *);
+	int (*power_on)(struct analogix_dp_plat_data *);
 	int (*power_off)(struct analogix_dp_plat_data *);
 	int (*attach)(struct analogix_dp_plat_data *, struct drm_bridge *,
 		      struct drm_connector *);
-- 
cgit v1.2.3


From e7514df007e3b034b65367a32ba19dc61aaa3980 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Wed, 19 Jun 2024 20:21:51 +0200
Subject: drm/bridge: analogix_dp: remove unused analogix_dp_remove

Now that the clock is handled dynamically through
analogix_dp_resume/suspend and it isn't statically enabled in the
driver probe routine, there is no need for the remove function anymore.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Robert Foss <rfoss@kernel.org>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Robert Foss <rfoss@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240619182200.3752465-5-l.stach@pengutronix.de
---
 include/drm/bridge/analogix_dp.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/bridge/analogix_dp.h b/include/drm/bridge/analogix_dp.h
index 8709b6a74c0f..6002c5666031 100644
--- a/include/drm/bridge/analogix_dp.h
+++ b/include/drm/bridge/analogix_dp.h
@@ -44,7 +44,6 @@ struct analogix_dp_device *
 analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data);
 int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev);
 void analogix_dp_unbind(struct analogix_dp_device *dp);
-void analogix_dp_remove(struct analogix_dp_device *dp);
 
 int analogix_dp_start_crc(struct drm_connector *connector);
 int analogix_dp_stop_crc(struct drm_connector *connector);
-- 
cgit v1.2.3


From 58de63ddd0ecc84548b8fd24c72deb3739365c78 Mon Sep 17 00:00:00 2001
From: "Jason-JH.Lin" <jason-jh.lin@mediatek.com>
Date: Tue, 25 Jun 2024 16:39:57 +0800
Subject: soc: mtk-cmdq: Add cmdq_pkt_logic_command to support math operation

Add cmdq_pkt_logic_command to support math operation.

cmdq_pkt_logic_command can append logic command to the CMDQ packet,
ask GCE to execute a arithmetic calculate instruction,
such as add, subtract, multiply, AND, OR and NOT, etc.

Note that all arithmetic instructions are unsigned calculations.
If there are any overflows, GCE will sent the invalid IRQ to notify
CMDQ driver.

Signed-off-by: Jason-JH.Lin <jason-jh.lin@mediatek.com>
Signed-off-by: Hsiao Chien Sung <shawn.sung@mediatek.com>
Link: https://lore.kernel.org/r/20240625083957.3540-1-jason-jh.lin@mediatek.com
Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
---
 include/linux/soc/mediatek/mtk-cmdq.h | 42 +++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h
index d4a8e34505e6..5bee6f7fc400 100644
--- a/include/linux/soc/mediatek/mtk-cmdq.h
+++ b/include/linux/soc/mediatek/mtk-cmdq.h
@@ -25,6 +25,31 @@
 
 struct cmdq_pkt;
 
+enum cmdq_logic_op {
+	CMDQ_LOGIC_ASSIGN = 0,
+	CMDQ_LOGIC_ADD = 1,
+	CMDQ_LOGIC_SUBTRACT = 2,
+	CMDQ_LOGIC_MULTIPLY = 3,
+	CMDQ_LOGIC_XOR = 8,
+	CMDQ_LOGIC_NOT = 9,
+	CMDQ_LOGIC_OR = 10,
+	CMDQ_LOGIC_AND = 11,
+	CMDQ_LOGIC_LEFT_SHIFT = 12,
+	CMDQ_LOGIC_RIGHT_SHIFT = 13,
+	CMDQ_LOGIC_MAX,
+};
+
+struct cmdq_operand {
+	/* register type */
+	bool reg;
+	union {
+		/* index */
+		u16 idx;
+		/* value */
+		u16 value;
+	};
+};
+
 struct cmdq_client_reg {
 	u8 subsys;
 	u16 offset;
@@ -272,6 +297,23 @@ int cmdq_pkt_poll(struct cmdq_pkt *pkt, u8 subsys,
 int cmdq_pkt_poll_mask(struct cmdq_pkt *pkt, u8 subsys,
 		       u16 offset, u32 value, u32 mask);
 
+/**
+ * cmdq_pkt_logic_command() - Append logic command to the CMDQ packet, ask GCE to
+ *		          execute an instruction that store the result of logic operation
+ *		          with left and right operand into result_reg_idx.
+ * @pkt:		the CMDQ packet
+ * @result_reg_idx:	SPR index that store operation result of left_operand and right_operand
+ * @left_operand:	left operand
+ * @s_op:		the logic operator enum
+ * @right_operand:	right operand
+ *
+ * Return: 0 for success; else the error code is returned
+ */
+int cmdq_pkt_logic_command(struct cmdq_pkt *pkt, u16 result_reg_idx,
+			   struct cmdq_operand *left_operand,
+			   enum cmdq_logic_op s_op,
+			   struct cmdq_operand *right_operand);
+
 /**
  * cmdq_pkt_assign() - Append logic assign command to the CMDQ packet, ask GCE
  *		       to execute an instruction that set a constant value into
-- 
cgit v1.2.3


From d1741141d03f8f5535adaf5a2d5000da3be9fe90 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Date: Wed, 26 Jun 2024 20:14:32 +0200
Subject: media: uapi: Add a pixel format for BGR48 and RGB48

Add BGR48 and RGB48 16-bit per component image formats.

Signed-off-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
Reviewed-by: Naushir Patuck <naush@raspberrypi.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/uapi/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index fe6b67e83751..dd6876380fe3 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -582,6 +582,8 @@ struct v4l2_pix_format {
 
 /* RGB formats (6 or 8 bytes per pixel) */
 #define V4L2_PIX_FMT_BGR48_12    v4l2_fourcc('B', '3', '1', '2') /* 48  BGR 12-bit per component */
+#define V4L2_PIX_FMT_BGR48       v4l2_fourcc('B', 'G', 'R', '6') /* 48  BGR 16-bit per component */
+#define V4L2_PIX_FMT_RGB48       v4l2_fourcc('R', 'G', 'B', '6') /* 48  RGB 16-bit per component */
 #define V4L2_PIX_FMT_ABGR64_12   v4l2_fourcc('B', '4', '1', '2') /* 64  BGRA 12-bit per component */
 
 /* Grey formats */
-- 
cgit v1.2.3


From c6c49bac8770ef95518faf20083e8b3e6150f45f Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Date: Wed, 26 Jun 2024 20:14:33 +0200
Subject: media: uapi: Add Raspberry Pi PiSP Back End uAPI

Add the Raspberry Pi PiSP Back End uAPI header.

The header defines the data type used to configure the PiSP Back End
ISP.

The detailed description of the types and of the ISP configuration
procedure is available at
https://datasheets.raspberrypi.com/camera/raspberry-pi-image-signal-processor-specification.pdf

Signed-off-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 .../uapi/linux/media/raspberrypi/pisp_be_config.h  | 927 +++++++++++++++++++++
 include/uapi/linux/media/raspberrypi/pisp_common.h | 199 +++++
 2 files changed, 1126 insertions(+)
 create mode 100644 include/uapi/linux/media/raspberrypi/pisp_be_config.h
 create mode 100644 include/uapi/linux/media/raspberrypi/pisp_common.h

(limited to 'include')

diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h
new file mode 100644
index 000000000000..1684ae068d4f
--- /dev/null
+++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h
@@ -0,0 +1,927 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * PiSP Back End configuration definitions.
+ *
+ * Copyright (C) 2021 - Raspberry Pi Ltd
+ *
+ */
+#ifndef _UAPI_PISP_BE_CONFIG_H_
+#define _UAPI_PISP_BE_CONFIG_H_
+
+#include <linux/types.h>
+
+#include "pisp_common.h"
+
+/* byte alignment for inputs */
+#define PISP_BACK_END_INPUT_ALIGN 4u
+/* alignment for compressed inputs */
+#define PISP_BACK_END_COMPRESSED_ALIGN 8u
+/* minimum required byte alignment for outputs */
+#define PISP_BACK_END_OUTPUT_MIN_ALIGN 16u
+/* preferred byte alignment for outputs */
+#define PISP_BACK_END_OUTPUT_MAX_ALIGN 64u
+
+/* minimum allowed tile width anywhere in the pipeline */
+#define PISP_BACK_END_MIN_TILE_WIDTH 16u
+/* minimum allowed tile width anywhere in the pipeline */
+#define PISP_BACK_END_MIN_TILE_HEIGHT 16u
+
+#define PISP_BACK_END_NUM_OUTPUTS 2
+#define PISP_BACK_END_HOG_OUTPUT 1
+
+#define PISP_BACK_END_NUM_TILES 64
+
+enum pisp_be_bayer_enable {
+	PISP_BE_BAYER_ENABLE_INPUT = 0x000001,
+	PISP_BE_BAYER_ENABLE_DECOMPRESS = 0x000002,
+	PISP_BE_BAYER_ENABLE_DPC = 0x000004,
+	PISP_BE_BAYER_ENABLE_GEQ = 0x000008,
+	PISP_BE_BAYER_ENABLE_TDN_INPUT = 0x000010,
+	PISP_BE_BAYER_ENABLE_TDN_DECOMPRESS = 0x000020,
+	PISP_BE_BAYER_ENABLE_TDN = 0x000040,
+	PISP_BE_BAYER_ENABLE_TDN_COMPRESS = 0x000080,
+	PISP_BE_BAYER_ENABLE_TDN_OUTPUT = 0x000100,
+	PISP_BE_BAYER_ENABLE_SDN = 0x000200,
+	PISP_BE_BAYER_ENABLE_BLC = 0x000400,
+	PISP_BE_BAYER_ENABLE_STITCH_INPUT = 0x000800,
+	PISP_BE_BAYER_ENABLE_STITCH_DECOMPRESS = 0x001000,
+	PISP_BE_BAYER_ENABLE_STITCH = 0x002000,
+	PISP_BE_BAYER_ENABLE_STITCH_COMPRESS = 0x004000,
+	PISP_BE_BAYER_ENABLE_STITCH_OUTPUT = 0x008000,
+	PISP_BE_BAYER_ENABLE_WBG = 0x010000,
+	PISP_BE_BAYER_ENABLE_CDN = 0x020000,
+	PISP_BE_BAYER_ENABLE_LSC = 0x040000,
+	PISP_BE_BAYER_ENABLE_TONEMAP = 0x080000,
+	PISP_BE_BAYER_ENABLE_CAC = 0x100000,
+	PISP_BE_BAYER_ENABLE_DEBIN = 0x200000,
+	PISP_BE_BAYER_ENABLE_DEMOSAIC = 0x400000,
+};
+
+enum pisp_be_rgb_enable {
+	PISP_BE_RGB_ENABLE_INPUT = 0x000001,
+	PISP_BE_RGB_ENABLE_CCM = 0x000002,
+	PISP_BE_RGB_ENABLE_SAT_CONTROL = 0x000004,
+	PISP_BE_RGB_ENABLE_YCBCR = 0x000008,
+	PISP_BE_RGB_ENABLE_FALSE_COLOUR = 0x000010,
+	PISP_BE_RGB_ENABLE_SHARPEN = 0x000020,
+	/* Preferred colours would occupy 0x000040 */
+	PISP_BE_RGB_ENABLE_YCBCR_INVERSE = 0x000080,
+	PISP_BE_RGB_ENABLE_GAMMA = 0x000100,
+	PISP_BE_RGB_ENABLE_CSC0 = 0x000200,
+	PISP_BE_RGB_ENABLE_CSC1 = 0x000400,
+	PISP_BE_RGB_ENABLE_DOWNSCALE0 = 0x001000,
+	PISP_BE_RGB_ENABLE_DOWNSCALE1 = 0x002000,
+	PISP_BE_RGB_ENABLE_RESAMPLE0 = 0x008000,
+	PISP_BE_RGB_ENABLE_RESAMPLE1 = 0x010000,
+	PISP_BE_RGB_ENABLE_OUTPUT0 = 0x040000,
+	PISP_BE_RGB_ENABLE_OUTPUT1 = 0x080000,
+	PISP_BE_RGB_ENABLE_HOG = 0x200000
+};
+
+#define PISP_BE_RGB_ENABLE_CSC(i) (PISP_BE_RGB_ENABLE_CSC0 << (i))
+#define PISP_BE_RGB_ENABLE_DOWNSCALE(i) (PISP_BE_RGB_ENABLE_DOWNSCALE0 << (i))
+#define PISP_BE_RGB_ENABLE_RESAMPLE(i) (PISP_BE_RGB_ENABLE_RESAMPLE0 << (i))
+#define PISP_BE_RGB_ENABLE_OUTPUT(i) (PISP_BE_RGB_ENABLE_OUTPUT0 << (i))
+
+/*
+ * We use the enable flags to show when blocks are "dirty", but we need some
+ * extra ones too.
+ */
+enum pisp_be_dirty {
+	PISP_BE_DIRTY_GLOBAL = 0x0001,
+	PISP_BE_DIRTY_SH_FC_COMBINE = 0x0002,
+	PISP_BE_DIRTY_CROP = 0x0004
+};
+
+/**
+ * struct pisp_be_global_config - PiSP global enable bitmaps
+ * @bayer_enables:	Bayer input enable flags
+ * @rgb_enables:	RGB output enable flags
+ * @bayer_order:	Bayer input format ordering
+ * @pad:		Padding bytes
+ */
+struct pisp_be_global_config {
+	__u32 bayer_enables;
+	__u32 rgb_enables;
+	__u8 bayer_order;
+	__u8 pad[3];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_input_buffer_config - PiSP Back End input buffer
+ * @addr:		Input buffer address
+ */
+struct pisp_be_input_buffer_config {
+	/* low 32 bits followed by high 32 bits (for each of up to 3 planes) */
+	__u32 addr[3][2];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_dpc_config - PiSP Back End DPC config
+ *
+ * Defective Pixel Correction configuration
+ *
+ * @coeff_level:	Coefficient for the darkest neighbouring pixel value
+ * @coeff_range:	Coefficient for the range of pixels for this Bayer channel
+ * @pad:		Padding byte
+ * @flags:		DPC configuration flags
+ */
+struct pisp_be_dpc_config {
+	__u8 coeff_level;
+	__u8 coeff_range;
+	__u8 pad;
+#define PISP_BE_DPC_FLAG_FOLDBACK 1
+	__u8 flags;
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_geq_config - PiSP Back End GEQ config
+ *
+ * Green Equalisation configuration
+ *
+ * @offset:		Offset value for threshold calculation
+ * @slope_sharper:	Slope/Sharper configuration
+ * @min:		Minimum value the threshold may have
+ * @max:		Maximum value the threshold may have
+ */
+struct pisp_be_geq_config {
+	__u16 offset;
+#define PISP_BE_GEQ_SHARPER BIT(15)
+#define PISP_BE_GEQ_SLOPE ((1 << 10) - 1)
+	/* top bit is the "sharper" flag, slope value is bottom 10 bits */
+	__u16 slope_sharper;
+	__u16 min;
+	__u16 max;
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_tdn_input_buffer_config - PiSP Back End TDN input buffer
+ * @addr:		TDN input buffer address
+ */
+struct pisp_be_tdn_input_buffer_config {
+	/* low 32 bits followed by high 32 bits */
+	__u32 addr[2];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_tdn_config - PiSP Back End TDN config
+ *
+ * Temporal Denoise configuration
+ *
+ * @black_level:	Black level value subtracted from pixels
+ * @ratio:		Multiplier for the LTA input frame
+ * @noise_constant:	Constant offset value used in noise estimation
+ * @noise_slope:	Noise estimation multiplier
+ * @threshold:		Threshold for TDN operations
+ * @reset:		Disable TDN operations
+ * @pad:		Padding byte
+ */
+struct pisp_be_tdn_config {
+	__u16 black_level;
+	__u16 ratio;
+	__u16 noise_constant;
+	__u16 noise_slope;
+	__u16 threshold;
+	__u8 reset;
+	__u8 pad;
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_tdn_output_buffer_config - PiSP Back End TDN output buffer
+ * @addr:		TDN output buffer address
+ */
+struct pisp_be_tdn_output_buffer_config {
+	/* low 32 bits followed by high 32 bits */
+	__u32 addr[2];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_sdn_config - PiSP Back End SDN config
+ *
+ * Spatial Denoise configuration
+ *
+ * @black_level:	Black level subtracted from pixel for noise estimation
+ * @leakage:		Proportion of the original undenoised value to mix in
+ *			denoised output
+ * @pad:		Padding byte
+ * @noise_constant:	Noise constant used for noise estimation
+ * @noise_slope:	Noise slope value used for noise estimation
+ * @noise_constant2:	Second noise constant used for noise estimation
+ * @noise_slope2:	Second slope value used for noise estimation
+ */
+struct pisp_be_sdn_config {
+	__u16 black_level;
+	__u8 leakage;
+	__u8 pad;
+	__u16 noise_constant;
+	__u16 noise_slope;
+	__u16 noise_constant2;
+	__u16 noise_slope2;
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_stitch_input_buffer_config - PiSP Back End Stitch input
+ * @addr:		Stitch input buffer address
+ */
+struct pisp_be_stitch_input_buffer_config {
+	/* low 32 bits followed by high 32 bits */
+	__u32 addr[2];
+} __attribute__((packed));
+
+#define PISP_BE_STITCH_STREAMING_LONG 0x8000
+#define PISP_BE_STITCH_EXPOSURE_RATIO_MASK 0x7fff
+
+/**
+ * struct pisp_be_stitch_config - PiSP Back End Stitch config
+ *
+ * Stitch block configuration
+ *
+ * @threshold_lo:		Low threshold value
+ * @threshold_diff_power:	Low and high threshold difference
+ * @pad:			Padding bytes
+ * @exposure_ratio:		Multiplier to convert long exposure pixels into
+ *				short exposure pixels
+ * @motion_threshold_256:	Motion threshold above which short exposure
+ *				pixels are used
+ * @motion_threshold_recip:	Reciprocal of motion_threshold_256 value
+ */
+struct pisp_be_stitch_config {
+	__u16 threshold_lo;
+	__u8 threshold_diff_power;
+	__u8 pad;
+
+	/* top bit indicates whether streaming input is the long exposure */
+	__u16 exposure_ratio;
+
+	__u8 motion_threshold_256;
+	__u8 motion_threshold_recip;
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_stitch_output_buffer_config - PiSP Back End Stitch output
+ * @addr:		Stitch input buffer address
+ */
+struct pisp_be_stitch_output_buffer_config {
+	/* low 32 bits followed by high 32 bits */
+	__u32 addr[2];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_cdn_config - PiSP Back End CDN config
+ *
+ * Colour Denoise configuration
+ *
+ * @thresh:		Constant for noise estimation
+ * @iir_strength:	Relative strength of the IIR part of the filter
+ * @g_adjust:		Proportion of the change assigned to the G channel
+ */
+struct pisp_be_cdn_config {
+	__u16 thresh;
+	__u8 iir_strength;
+	__u8 g_adjust;
+} __attribute__((packed));
+
+#define PISP_BE_LSC_LOG_GRID_SIZE 5
+#define PISP_BE_LSC_GRID_SIZE (1 << PISP_BE_LSC_LOG_GRID_SIZE)
+#define PISP_BE_LSC_STEP_PRECISION 18
+
+/**
+ * struct pisp_be_lsc_config - PiSP Back End LSC config
+ *
+ * Lens Shading Correction configuration
+ *
+ * @grid_step_x:	Reciprocal of cell size width
+ * @grid_step_y:	Reciprocal of cell size height
+ * @lut_packed:		Jointly-coded RGB gains for each LSC grid
+ */
+struct pisp_be_lsc_config {
+	/* (1<<18) / grid_cell_width */
+	__u16 grid_step_x;
+	/* (1<<18) / grid_cell_height */
+	__u16 grid_step_y;
+	/* RGB gains jointly encoded in 32 bits */
+#define PISP_BE_LSC_LUT_SIZE	(PISP_BE_LSC_GRID_SIZE + 1)
+	__u32 lut_packed[PISP_BE_LSC_LUT_SIZE][PISP_BE_LSC_LUT_SIZE];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_lsc_extra - PiSP Back End LSC Extra config
+ * @offset_x:		Horizontal offset into the LSC table of this tile
+ * @offset_y:		Vertical offset into the LSC table of this tile
+ */
+struct pisp_be_lsc_extra {
+	__u16 offset_x;
+	__u16 offset_y;
+} __attribute__((packed));
+
+#define PISP_BE_CAC_LOG_GRID_SIZE 3
+#define PISP_BE_CAC_GRID_SIZE (1 << PISP_BE_CAC_LOG_GRID_SIZE)
+#define PISP_BE_CAC_STEP_PRECISION 20
+
+/**
+ * struct pisp_be_cac_config - PiSP Back End CAC config
+ *
+ * Chromatic Aberration Correction config
+ *
+ * @grid_step_x:	Reciprocal of cell size width
+ * @grid_step_y:	Reciprocal of cell size height
+ * @lut:		Pixel shift for the CAC grid
+ */
+struct pisp_be_cac_config {
+	/* (1<<20) / grid_cell_width */
+	__u16 grid_step_x;
+	/* (1<<20) / grid_cell_height */
+	__u16 grid_step_y;
+	/* [gridy][gridx][rb][xy] */
+#define PISP_BE_CAC_LUT_SIZE		(PISP_BE_CAC_GRID_SIZE + 1)
+	__s8 lut[PISP_BE_CAC_LUT_SIZE][PISP_BE_CAC_LUT_SIZE][2][2];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_cac_extra - PiSP Back End CAC extra config
+ * @offset_x:		Horizontal offset into the CAC table of this tile
+ * @offset_y:		Horizontal offset into the CAC table of this tile
+ */
+struct pisp_be_cac_extra {
+	__u16 offset_x;
+	__u16 offset_y;
+} __attribute__((packed));
+
+#define PISP_BE_DEBIN_NUM_COEFFS 4
+
+/**
+ * struct pisp_be_debin_config - PiSP Back End Debin config
+ *
+ * Debinning configuration
+ *
+ * @coeffs:		Filter coefficients for debinning
+ * @h_enable:		Horizontal debinning enable
+ * @v_enable:		Vertical debinning enable
+ * @pad:		Padding bytes
+ */
+struct pisp_be_debin_config {
+	__s8 coeffs[PISP_BE_DEBIN_NUM_COEFFS];
+	__s8 h_enable;
+	__s8 v_enable;
+	__s8 pad[2];
+} __attribute__((packed));
+
+#define PISP_BE_TONEMAP_LUT_SIZE 64
+
+/**
+ * struct pisp_be_tonemap_config - PiSP Back End Tonemap config
+ *
+ * Tonemapping configuration
+ *
+ * @detail_constant:	Constant value for threshold calculation
+ * @detail_slope:	Slope value for threshold calculation
+ * @iir_strength:	Relative strength of the IIR fiter
+ * @strength:		Strength factor
+ * @lut:		Look-up table for tonemap curve
+ */
+struct pisp_be_tonemap_config {
+	__u16 detail_constant;
+	__u16 detail_slope;
+	__u16 iir_strength;
+	__u16 strength;
+	__u32 lut[PISP_BE_TONEMAP_LUT_SIZE];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_demosaic_config - PiSP Back End Demosaic config
+ *
+ * Demosaic configuration
+ *
+ * @sharper:		Use other Bayer channels to increase sharpness
+ * @fc_mode:		Built-in false colour suppression mode
+ * @pad:		Padding bytes
+ */
+struct pisp_be_demosaic_config {
+	__u8 sharper;
+	__u8 fc_mode;
+	__u8 pad[2];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_ccm_config - PiSP Back End CCM config
+ *
+ * Colour Correction Matrix configuration
+ *
+ * @coeffs:		Matrix coefficients
+ * @pad:		Padding bytes
+ * @offsets:		Offsets triplet
+ */
+struct pisp_be_ccm_config {
+	__s16 coeffs[9];
+	__u8 pad[2];
+	__s32 offsets[3];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_sat_control_config - PiSP Back End SAT config
+ *
+ * Saturation Control configuration
+ *
+ * @shift_r:		Left shift for Red colour channel
+ * @shift_g:		Left shift for Green colour channel
+ * @shift_b:		Left shift for Blue colour channel
+ * @pad:		Padding byte
+ */
+struct pisp_be_sat_control_config {
+	__u8 shift_r;
+	__u8 shift_g;
+	__u8 shift_b;
+	__u8 pad;
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_false_colour_config - PiSP Back End False Colour config
+ *
+ * False Colour configuration
+ *
+ * @distance:		Distance of neighbouring pixels, either 1 or 2
+ * @pad:		Padding bytes
+ */
+struct pisp_be_false_colour_config {
+	__u8 distance;
+	__u8 pad[3];
+} __attribute__((packed));
+
+#define PISP_BE_SHARPEN_SIZE 5
+#define PISP_BE_SHARPEN_FUNC_NUM_POINTS 9
+
+/**
+ * struct pisp_be_sharpen_config - PiSP Back End Sharpening config
+ *
+ * Sharpening configuration
+ *
+ * @kernel0:		Coefficient for filter 0
+ * @pad0:		Padding byte
+ * @kernel1:		Coefficient for filter 1
+ * @pad1:		Padding byte
+ * @kernel2:		Coefficient for filter 2
+ * @pad2:		Padding byte
+ * @kernel3:		Coefficient for filter 3
+ * @pad3:		Padding byte
+ * @kernel4:		Coefficient for filter 4
+ * @pad4:		Padding byte
+ * @threshold_offset0:	Offset for filter 0 response calculation
+ * @threshold_slope0:	Slope multiplier for the filter 0 response calculation
+ * @scale0:		Scale factor for filter 0 response calculation
+ * @pad5:		Padding byte
+ * @threshold_offset1:	Offset for filter 0 response calculation
+ * @threshold_slope1:	Slope multiplier for the filter 0 response calculation
+ * @scale1:		Scale factor for filter 0 response calculation
+ * @pad6:		Padding byte
+ * @threshold_offset2:	Offset for filter 0 response calculation
+ * @threshold_slope2:	Slope multiplier for the filter 0 response calculation
+ * @scale2:		Scale factor for filter 0 response calculation
+ * @pad7:		Padding byte
+ * @threshold_offset3:	Offset for filter 0 response calculation
+ * @threshold_slope3:	Slope multiplier for the filter 0 response calculation
+ * @scale3:		Scale factor for filter 0 response calculation
+ * @pad8:		Padding byte
+ * @threshold_offset4:	Offset for filter 0 response calculation
+ * @threshold_slope4:	Slope multiplier for the filter 0 response calculation
+ * @scale4:		Scale factor for filter 0 response calculation
+ * @pad9:		Padding byte
+ * @positive_strength:	Factor to scale the positive sharpening strength
+ * @positive_pre_limit:	Maximum allowed possible positive sharpening value
+ * @positive_func:	Gain factor applied to positive sharpening response
+ * @positive_limit:	Final gain factor applied to positive sharpening
+ * @negative_strength:	Factor to scale the negative sharpening strength
+ * @negative_pre_limit:	Maximum allowed possible negative sharpening value
+ * @negative_func:	Gain factor applied to negative sharpening response
+ * @negative_limit:	Final gain factor applied to negative sharpening
+ * @enables:		Filter enable mask
+ * @white:		White output pixel filter mask
+ * @black:		Black output pixel filter mask
+ * @grey:		Grey output pixel filter mask
+ */
+struct pisp_be_sharpen_config {
+	__s8 kernel0[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE];
+	__s8 pad0[3];
+	__s8 kernel1[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE];
+	__s8 pad1[3];
+	__s8 kernel2[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE];
+	__s8 pad2[3];
+	__s8 kernel3[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE];
+	__s8 pad3[3];
+	__s8 kernel4[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE];
+	__s8 pad4[3];
+	__u16 threshold_offset0;
+	__u16 threshold_slope0;
+	__u16 scale0;
+	__u16 pad5;
+	__u16 threshold_offset1;
+	__u16 threshold_slope1;
+	__u16 scale1;
+	__u16 pad6;
+	__u16 threshold_offset2;
+	__u16 threshold_slope2;
+	__u16 scale2;
+	__u16 pad7;
+	__u16 threshold_offset3;
+	__u16 threshold_slope3;
+	__u16 scale3;
+	__u16 pad8;
+	__u16 threshold_offset4;
+	__u16 threshold_slope4;
+	__u16 scale4;
+	__u16 pad9;
+	__u16 positive_strength;
+	__u16 positive_pre_limit;
+	__u16 positive_func[PISP_BE_SHARPEN_FUNC_NUM_POINTS];
+	__u16 positive_limit;
+	__u16 negative_strength;
+	__u16 negative_pre_limit;
+	__u16 negative_func[PISP_BE_SHARPEN_FUNC_NUM_POINTS];
+	__u16 negative_limit;
+	__u8 enables;
+	__u8 white;
+	__u8 black;
+	__u8 grey;
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_sh_fc_combine_config - PiSP Back End Sharpening and
+ *					 False Colour config
+ *
+ * Sharpening and False Colour configuration
+ *
+ * @y_factor:		Control amount of desaturation of pixels being darkened
+ * @c1_factor:		Control amount of brightening of a pixel for the Cb
+ *			channel
+ * @c2_factor:		Control amount of brightening of a pixel for the Cr
+ *			channel
+ * @pad:		Padding byte
+ */
+struct pisp_be_sh_fc_combine_config {
+	__u8 y_factor;
+	__u8 c1_factor;
+	__u8 c2_factor;
+	__u8 pad;
+} __attribute__((packed));
+
+#define PISP_BE_GAMMA_LUT_SIZE 64
+
+/**
+ * struct pisp_be_gamma_config - PiSP Back End Gamma configuration
+ * @lut:		Gamma curve look-up table
+ */
+struct pisp_be_gamma_config {
+	__u32 lut[PISP_BE_GAMMA_LUT_SIZE];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_crop_config - PiSP Back End Crop config
+ *
+ * Crop configuration
+ *
+ * @offset_x:		Number of pixels cropped from the left of the tile
+ * @offset_y:		Number of pixels cropped from the top of the tile
+ * @width:		Width of the cropped tile output
+ * @height:		Height of the cropped tile output
+ */
+struct pisp_be_crop_config {
+	__u16 offset_x, offset_y;
+	__u16 width, height;
+} __attribute__((packed));
+
+#define PISP_BE_RESAMPLE_FILTER_SIZE 96
+
+/**
+ * struct pisp_be_resample_config - PiSP Back End Resampling config
+ *
+ * Resample configuration
+ *
+ * @scale_factor_h:	Horizontal scale factor
+ * @scale_factor_v:	Vertical scale factor
+ * @coef:		Resample coefficients
+ */
+struct pisp_be_resample_config {
+	__u16 scale_factor_h, scale_factor_v;
+	__s16 coef[PISP_BE_RESAMPLE_FILTER_SIZE];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_resample_extra - PiSP Back End Resample config
+ *
+ * Resample configuration
+ *
+ * @scaled_width:	Width in pixels of the scaled output
+ * @scaled_height:	Height in pixels of the scaled output
+ * @initial_phase_h:	Initial horizontal phase
+ * @initial_phase_v:	Initial vertical phase
+ */
+struct pisp_be_resample_extra {
+	__u16 scaled_width;
+	__u16 scaled_height;
+	__s16 initial_phase_h[3];
+	__s16 initial_phase_v[3];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_downscale_config - PiSP Back End Downscale config
+ *
+ * Downscale configuration
+ *
+ * @scale_factor_h:	Horizontal scale factor
+ * @scale_factor_v:	Vertical scale factor
+ * @scale_recip_h:	Horizontal reciprocal factor
+ * @scale_recip_v:	Vertical reciprocal factor
+ */
+struct pisp_be_downscale_config {
+	__u16 scale_factor_h;
+	__u16 scale_factor_v;
+	__u16 scale_recip_h;
+	__u16 scale_recip_v;
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_downscale_extra - PiSP Back End Downscale Extra config
+ * @scaled_width:	Scaled image width
+ * @scaled_height:	Scaled image height
+ */
+struct pisp_be_downscale_extra {
+	__u16 scaled_width;
+	__u16 scaled_height;
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_hog_config - PiSP Back End HOG config
+ *
+ * Histogram of Oriented Gradients configuration
+ *
+ * @compute_signed:	Set 0 for unsigned gradients, 1 for signed
+ * @channel_mix:	Channels proportions to use
+ * @stride:		Stride in bytes between blocks directly below
+ */
+struct pisp_be_hog_config {
+	__u8 compute_signed;
+	__u8 channel_mix[3];
+	__u32 stride;
+} __attribute__((packed));
+
+struct pisp_be_axi_config {
+	__u8 r_qos; /* Read QoS */
+	__u8 r_cache_prot; /* Read { prot[2:0], cache[3:0] } */
+	__u8 w_qos; /* Write QoS */
+	__u8 w_cache_prot; /* Write { prot[2:0], cache[3:0] } */
+} __attribute__((packed));
+
+/**
+ * enum pisp_be_transform - PiSP Back End Transform flags
+ * @PISP_BE_TRANSFORM_NONE:	No transform
+ * @PISP_BE_TRANSFORM_HFLIP:	Horizontal flip
+ * @PISP_BE_TRANSFORM_VFLIP:	Vertical flip
+ * @PISP_BE_TRANSFORM_ROT180:	180 degress rotation
+ */
+enum pisp_be_transform {
+	PISP_BE_TRANSFORM_NONE = 0x0,
+	PISP_BE_TRANSFORM_HFLIP = 0x1,
+	PISP_BE_TRANSFORM_VFLIP = 0x2,
+	PISP_BE_TRANSFORM_ROT180 =
+		(PISP_BE_TRANSFORM_HFLIP | PISP_BE_TRANSFORM_VFLIP)
+};
+
+struct pisp_be_output_format_config {
+	struct pisp_image_format_config image;
+	__u8 transform;
+	__u8 pad[3];
+	__u16 lo;
+	__u16 hi;
+	__u16 lo2;
+	__u16 hi2;
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_output_buffer_config - PiSP Back End Output buffer
+ * @addr:		Output buffer address
+ */
+struct pisp_be_output_buffer_config {
+	/* low 32 bits followed by high 32 bits (for each of 3 planes) */
+	__u32 addr[3][2];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_hog_buffer_config - PiSP Back End HOG buffer
+ * @addr:		HOG buffer address
+ */
+struct pisp_be_hog_buffer_config {
+	/* low 32 bits followed by high 32 bits */
+	__u32 addr[2];
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_config - RaspberryPi PiSP Back End Processing configuration
+ *
+ * @global:			Global PiSP configuration
+ * @input_format:		Input image format
+ * @decompress:			Decompress configuration
+ * @dpc:			Defective Pixel Correction configuration
+ * @geq:			Green Equalisation configuration
+ * @tdn_input_format:		Temporal Denoise input format
+ * @tdn_decompress:		Temporal Denoise decompress configuration
+ * @tdn:			Temporal Denoise configuration
+ * @tdn_compress:		Temporal Denoise compress configuration
+ * @tdn_output_format:		Temporal Denoise output format
+ * @sdn:			Spatial Denoise configuration
+ * @blc:			Black Level Correction configuration
+ * @stitch_compress:		Stitch compress configuration
+ * @stitch_output_format:	Stitch output format
+ * @stitch_input_format:	Stitch input format
+ * @stitch_decompress:		Stitch decompress configuration
+ * @stitch:			Stitch configuration
+ * @lsc:			Lens Shading Correction configuration
+ * @wbg:			White Balance Gain configuration
+ * @cdn:			Colour Denoise configuration
+ * @cac:			Colour Aberration Correction configuration
+ * @debin:			Debinning configuration
+ * @tonemap:			Tonemapping configuration
+ * @demosaic:			Demosaicing configuration
+ * @ccm:			Colour Correction Matrix configuration
+ * @sat_control:		Saturation Control configuration
+ * @ycbcr:			YCbCr colour correction configuration
+ * @sharpen:			Sharpening configuration
+ * @false_colour:		False colour correction
+ * @sh_fc_combine:		Sharpening and False Colour correction
+ * @ycbcr_inverse:		Inverse YCbCr colour correction
+ * @gamma:			Gamma curve configuration
+ * @csc:			Color Space Conversion configuration
+ * @downscale:			Downscale configuration
+ * @resample:			Resampling configuration
+ * @output_format:		Output format configuration
+ * @hog:			HOG configuration
+ */
+struct pisp_be_config {
+	struct pisp_be_global_config global;
+	struct pisp_image_format_config input_format;
+	struct pisp_decompress_config decompress;
+	struct pisp_be_dpc_config dpc;
+	struct pisp_be_geq_config geq;
+	struct pisp_image_format_config tdn_input_format;
+	struct pisp_decompress_config tdn_decompress;
+	struct pisp_be_tdn_config tdn;
+	struct pisp_compress_config tdn_compress;
+	struct pisp_image_format_config tdn_output_format;
+	struct pisp_be_sdn_config sdn;
+	struct pisp_bla_config blc;
+	struct pisp_compress_config stitch_compress;
+	struct pisp_image_format_config stitch_output_format;
+	struct pisp_image_format_config stitch_input_format;
+	struct pisp_decompress_config stitch_decompress;
+	struct pisp_be_stitch_config stitch;
+	struct pisp_be_lsc_config lsc;
+	struct pisp_wbg_config wbg;
+	struct pisp_be_cdn_config cdn;
+	struct pisp_be_cac_config cac;
+	struct pisp_be_debin_config debin;
+	struct pisp_be_tonemap_config tonemap;
+	struct pisp_be_demosaic_config demosaic;
+	struct pisp_be_ccm_config ccm;
+	struct pisp_be_sat_control_config sat_control;
+	struct pisp_be_ccm_config ycbcr;
+	struct pisp_be_sharpen_config sharpen;
+	struct pisp_be_false_colour_config false_colour;
+	struct pisp_be_sh_fc_combine_config sh_fc_combine;
+	struct pisp_be_ccm_config ycbcr_inverse;
+	struct pisp_be_gamma_config gamma;
+	struct pisp_be_ccm_config csc[PISP_BACK_END_NUM_OUTPUTS];
+	struct pisp_be_downscale_config downscale[PISP_BACK_END_NUM_OUTPUTS];
+	struct pisp_be_resample_config resample[PISP_BACK_END_NUM_OUTPUTS];
+	struct pisp_be_output_format_config
+				output_format[PISP_BACK_END_NUM_OUTPUTS];
+	struct pisp_be_hog_config hog;
+} __attribute__((packed));
+
+/**
+ * enum pisp_tile_edge - PiSP Back End Tile position
+ * @PISP_LEFT_EDGE:		Left edge tile
+ * @PISP_RIGHT_EDGE:		Right edge tile
+ * @PISP_TOP_EDGE:		Top edge tile
+ * @PISP_BOTTOM_EDGE:		Bottom edge tile
+ */
+enum pisp_tile_edge {
+	PISP_LEFT_EDGE = (1 << 0),
+	PISP_RIGHT_EDGE = (1 << 1),
+	PISP_TOP_EDGE = (1 << 2),
+	PISP_BOTTOM_EDGE = (1 << 3)
+};
+
+/**
+ * struct pisp_tile - Raspberry Pi PiSP Back End tile configuration
+ *
+ * Tile parameters: each set of tile parameters is a 160-bytes block of data
+ * which contains the tile processing parameters.
+ *
+ * @edge:			Edge tile flag
+ * @pad0:			Padding bytes
+ * @input_addr_offset:		Top-left pixel offset, in bytes
+ * @input_addr_offset2:		Top-left pixel offset, in bytes for the second/
+ *				third image planes
+ * @input_offset_x:		Horizontal offset in pixels of this tile in the
+ *				input image
+ * @input_offset_y:		Vertical offset in pixels of this tile in the
+ *				input image
+ * @input_width:		Width in pixels of this tile
+ * @input_height:		Height in pixels of the this tile
+ * @tdn_input_addr_offset:	TDN input image offset, in bytes
+ * @tdn_output_addr_offset:	TDN output image offset, in bytes
+ * @stitch_input_addr_offset:	Stitch input image offset, in bytes
+ * @stitch_output_addr_offset:	Stitch output image offset, in bytes
+ * @lsc_grid_offset_x:		Horizontal offset in the LSC table for this tile
+ * @lsc_grid_offset_y:		Vertical offset in the LSC table for this tile
+ * @cac_grid_offset_x:		Horizontal offset in the CAC table for this tile
+ * @cac_grid_offset_y:		Horizontal offset in the CAC table for this tile
+ * @crop_x_start:		Number of pixels cropped from the left of the
+ *				tile
+ * @crop_x_end:			Number of pixels cropped from the right of the
+ *				tile
+ * @crop_y_start:		Number of pixels cropped from the top of the
+ *				tile
+ * @crop_y_end:			Number of pixels cropped from the bottom of the
+ *				tile
+ * @downscale_phase_x:		Initial horizontal phase in pixels
+ * @downscale_phase_y:		Initial vertical phase in pixels
+ * @resample_in_width:		Width in pixels of the tile entering the
+ *				Resample block
+ * @resample_in_height:		Height in pixels of the tile entering the
+ *				Resample block
+ * @resample_phase_x:		Initial horizontal phase for the Resample block
+ * @resample_phase_y:		Initial vertical phase for the Resample block
+ * @output_offset_x:		Horizontal offset in pixels where the tile will
+ *				be written into the output image
+ * @output_offset_y:		Vertical offset in pixels where the tile will be
+ *				written into the output image
+ * @output_width:		Width in pixels in the output image of this tile
+ * @output_height:		Height in pixels in the output image of this tile
+ * @output_addr_offset:		Offset in bytes into the output buffer
+ * @output_addr_offset2:	Offset in bytes into the output buffer for the
+ *				second and third plane
+ * @output_hog_addr_offset:	Offset in bytes into the HOG buffer where
+ *				results of this tile are to be written
+ */
+struct pisp_tile {
+	__u8 edge; /* enum pisp_tile_edge */
+	__u8 pad0[3];
+	/* 4 bytes */
+	__u32 input_addr_offset;
+	__u32 input_addr_offset2;
+	__u16 input_offset_x;
+	__u16 input_offset_y;
+	__u16 input_width;
+	__u16 input_height;
+	/* 20 bytes */
+	__u32 tdn_input_addr_offset;
+	__u32 tdn_output_addr_offset;
+	__u32 stitch_input_addr_offset;
+	__u32 stitch_output_addr_offset;
+	/* 36 bytes */
+	__u32 lsc_grid_offset_x;
+	__u32 lsc_grid_offset_y;
+	/* 44 bytes */
+	__u32 cac_grid_offset_x;
+	__u32 cac_grid_offset_y;
+	/* 52 bytes */
+	__u16 crop_x_start[PISP_BACK_END_NUM_OUTPUTS];
+	__u16 crop_x_end[PISP_BACK_END_NUM_OUTPUTS];
+	__u16 crop_y_start[PISP_BACK_END_NUM_OUTPUTS];
+	__u16 crop_y_end[PISP_BACK_END_NUM_OUTPUTS];
+	/* 68 bytes */
+	/* Ordering is planes then branches */
+	__u16 downscale_phase_x[3 * PISP_BACK_END_NUM_OUTPUTS];
+	__u16 downscale_phase_y[3 * PISP_BACK_END_NUM_OUTPUTS];
+	/* 92 bytes */
+	__u16 resample_in_width[PISP_BACK_END_NUM_OUTPUTS];
+	__u16 resample_in_height[PISP_BACK_END_NUM_OUTPUTS];
+	/* 100 bytes */
+	/* Ordering is planes then branches */
+	__u16 resample_phase_x[3 * PISP_BACK_END_NUM_OUTPUTS];
+	__u16 resample_phase_y[3 * PISP_BACK_END_NUM_OUTPUTS];
+	/* 124 bytes */
+	__u16 output_offset_x[PISP_BACK_END_NUM_OUTPUTS];
+	__u16 output_offset_y[PISP_BACK_END_NUM_OUTPUTS];
+	__u16 output_width[PISP_BACK_END_NUM_OUTPUTS];
+	__u16 output_height[PISP_BACK_END_NUM_OUTPUTS];
+	/* 140 bytes */
+	__u32 output_addr_offset[PISP_BACK_END_NUM_OUTPUTS];
+	__u32 output_addr_offset2[PISP_BACK_END_NUM_OUTPUTS];
+	/* 156 bytes */
+	__u32 output_hog_addr_offset;
+	/* 160 bytes */
+} __attribute__((packed));
+
+/**
+ * struct pisp_be_tiles_config - Raspberry Pi PiSP Back End configuration
+ * @tiles:	Tile descriptors
+ * @num_tiles:	Number of tiles
+ * @config:	PiSP Back End configuration
+ */
+struct pisp_be_tiles_config {
+	struct pisp_tile tiles[PISP_BACK_END_NUM_TILES];
+	__u32 num_tiles;
+	struct pisp_be_config config;
+} __attribute__((packed));
+
+#endif /* _UAPI_PISP_BE_CONFIG_H_ */
diff --git a/include/uapi/linux/media/raspberrypi/pisp_common.h b/include/uapi/linux/media/raspberrypi/pisp_common.h
new file mode 100644
index 000000000000..b2522e29c976
--- /dev/null
+++ b/include/uapi/linux/media/raspberrypi/pisp_common.h
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * RP1 PiSP common definitions.
+ *
+ * Copyright (C) 2021 - Raspberry Pi Ltd.
+ *
+ */
+#ifndef _UAPI_PISP_COMMON_H_
+#define _UAPI_PISP_COMMON_H_
+
+#include <linux/types.h>
+
+struct pisp_image_format_config {
+	/* size in pixels */
+	__u16 width;
+	__u16 height;
+	/* must match struct pisp_image_format below */
+	__u32 format;
+	__s32 stride;
+	/* some planar image formats will need a second stride */
+	__s32 stride2;
+} __attribute__((packed));
+
+enum pisp_bayer_order {
+	/*
+	 * Note how bayer_order&1 tells you if G is on the even pixels of the
+	 * checkerboard or not, and bayer_order&2 tells you if R is on the even
+	 * rows or is swapped with B. Note that if the top (of the 8) bits is
+	 * set, this denotes a monochrome or greyscale image, and the lower bits
+	 * should all be ignored.
+	 */
+	PISP_BAYER_ORDER_RGGB = 0,
+	PISP_BAYER_ORDER_GBRG = 1,
+	PISP_BAYER_ORDER_BGGR = 2,
+	PISP_BAYER_ORDER_GRBG = 3,
+	PISP_BAYER_ORDER_GREYSCALE = 128
+};
+
+enum pisp_image_format {
+	/*
+	 * Precise values are mostly tbd. Generally these will be portmanteau
+	 * values comprising bit fields and flags. This format must be shared
+	 * throughout the PiSP.
+	 */
+	PISP_IMAGE_FORMAT_BPS_8 = 0x00000000,
+	PISP_IMAGE_FORMAT_BPS_10 = 0x00000001,
+	PISP_IMAGE_FORMAT_BPS_12 = 0x00000002,
+	PISP_IMAGE_FORMAT_BPS_16 = 0x00000003,
+	PISP_IMAGE_FORMAT_BPS_MASK = 0x00000003,
+
+	PISP_IMAGE_FORMAT_PLANARITY_INTERLEAVED = 0x00000000,
+	PISP_IMAGE_FORMAT_PLANARITY_SEMI_PLANAR = 0x00000010,
+	PISP_IMAGE_FORMAT_PLANARITY_PLANAR = 0x00000020,
+	PISP_IMAGE_FORMAT_PLANARITY_MASK = 0x00000030,
+
+	PISP_IMAGE_FORMAT_SAMPLING_444 = 0x00000000,
+	PISP_IMAGE_FORMAT_SAMPLING_422 = 0x00000100,
+	PISP_IMAGE_FORMAT_SAMPLING_420 = 0x00000200,
+	PISP_IMAGE_FORMAT_SAMPLING_MASK = 0x00000300,
+
+	PISP_IMAGE_FORMAT_ORDER_NORMAL = 0x00000000,
+	PISP_IMAGE_FORMAT_ORDER_SWAPPED = 0x00001000,
+
+	PISP_IMAGE_FORMAT_SHIFT_0 = 0x00000000,
+	PISP_IMAGE_FORMAT_SHIFT_1 = 0x00010000,
+	PISP_IMAGE_FORMAT_SHIFT_2 = 0x00020000,
+	PISP_IMAGE_FORMAT_SHIFT_3 = 0x00030000,
+	PISP_IMAGE_FORMAT_SHIFT_4 = 0x00040000,
+	PISP_IMAGE_FORMAT_SHIFT_5 = 0x00050000,
+	PISP_IMAGE_FORMAT_SHIFT_6 = 0x00060000,
+	PISP_IMAGE_FORMAT_SHIFT_7 = 0x00070000,
+	PISP_IMAGE_FORMAT_SHIFT_8 = 0x00080000,
+	PISP_IMAGE_FORMAT_SHIFT_MASK = 0x000f0000,
+
+	PISP_IMAGE_FORMAT_UNCOMPRESSED = 0x00000000,
+	PISP_IMAGE_FORMAT_COMPRESSION_MODE_1 = 0x01000000,
+	PISP_IMAGE_FORMAT_COMPRESSION_MODE_2 = 0x02000000,
+	PISP_IMAGE_FORMAT_COMPRESSION_MODE_3 = 0x03000000,
+	PISP_IMAGE_FORMAT_COMPRESSION_MASK = 0x03000000,
+
+	PISP_IMAGE_FORMAT_HOG_SIGNED = 0x04000000,
+	PISP_IMAGE_FORMAT_HOG_UNSIGNED = 0x08000000,
+	PISP_IMAGE_FORMAT_INTEGRAL_IMAGE = 0x10000000,
+	PISP_IMAGE_FORMAT_WALLPAPER_ROLL = 0x20000000,
+	PISP_IMAGE_FORMAT_THREE_CHANNEL = 0x40000000,
+
+	/* Lastly a few specific instantiations of the above. */
+	PISP_IMAGE_FORMAT_SINGLE_16 = PISP_IMAGE_FORMAT_BPS_16,
+	PISP_IMAGE_FORMAT_THREE_16 = PISP_IMAGE_FORMAT_BPS_16 |
+				     PISP_IMAGE_FORMAT_THREE_CHANNEL
+};
+
+#define PISP_IMAGE_FORMAT_bps_8(fmt)                                           \
+	(((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_8)
+#define PISP_IMAGE_FORMAT_bps_10(fmt)                                          \
+	(((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_10)
+#define PISP_IMAGE_FORMAT_bps_12(fmt)                                          \
+	(((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_12)
+#define PISP_IMAGE_FORMAT_bps_16(fmt)                                          \
+	(((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_16)
+#define PISP_IMAGE_FORMAT_bps(fmt)                                             \
+	(((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) ?                                \
+	       8 + (2 << (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) - 1)) : 8)
+#define PISP_IMAGE_FORMAT_shift(fmt)                                           \
+	(((fmt) & PISP_IMAGE_FORMAT_SHIFT_MASK) / PISP_IMAGE_FORMAT_SHIFT_1)
+#define PISP_IMAGE_FORMAT_three_channel(fmt)                                   \
+	((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL)
+#define PISP_IMAGE_FORMAT_single_channel(fmt)                                  \
+	(!((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL))
+#define PISP_IMAGE_FORMAT_compressed(fmt)                                      \
+	(((fmt) & PISP_IMAGE_FORMAT_COMPRESSION_MASK) !=                       \
+	 PISP_IMAGE_FORMAT_UNCOMPRESSED)
+#define PISP_IMAGE_FORMAT_sampling_444(fmt)                                    \
+	(((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) ==                          \
+	 PISP_IMAGE_FORMAT_SAMPLING_444)
+#define PISP_IMAGE_FORMAT_sampling_422(fmt)                                    \
+	(((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) ==                          \
+	 PISP_IMAGE_FORMAT_SAMPLING_422)
+#define PISP_IMAGE_FORMAT_sampling_420(fmt)                                    \
+	(((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) ==                          \
+	 PISP_IMAGE_FORMAT_SAMPLING_420)
+#define PISP_IMAGE_FORMAT_order_normal(fmt)                                    \
+	(!((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED))
+#define PISP_IMAGE_FORMAT_order_swapped(fmt)                                   \
+	((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED)
+#define PISP_IMAGE_FORMAT_interleaved(fmt)                                     \
+	(((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) ==                         \
+	 PISP_IMAGE_FORMAT_PLANARITY_INTERLEAVED)
+#define PISP_IMAGE_FORMAT_semiplanar(fmt)                                      \
+	(((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) ==                         \
+	 PISP_IMAGE_FORMAT_PLANARITY_SEMI_PLANAR)
+#define PISP_IMAGE_FORMAT_planar(fmt)                                          \
+	(((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) ==                         \
+	 PISP_IMAGE_FORMAT_PLANARITY_PLANAR)
+#define PISP_IMAGE_FORMAT_wallpaper(fmt)                                       \
+	((fmt) & PISP_IMAGE_FORMAT_WALLPAPER_ROLL)
+#define PISP_IMAGE_FORMAT_HOG(fmt)                                             \
+	((fmt) &                                                               \
+	 (PISP_IMAGE_FORMAT_HOG_SIGNED | PISP_IMAGE_FORMAT_HOG_UNSIGNED))
+
+#define PISP_WALLPAPER_WIDTH 128 /* in bytes */
+
+struct pisp_bla_config {
+	__u16 black_level_r;
+	__u16 black_level_gr;
+	__u16 black_level_gb;
+	__u16 black_level_b;
+	__u16 output_black_level;
+	__u8 pad[2];
+} __attribute__((packed));
+
+struct pisp_wbg_config {
+	__u16 gain_r;
+	__u16 gain_g;
+	__u16 gain_b;
+	__u8 pad[2];
+} __attribute__((packed));
+
+struct pisp_compress_config {
+	/* value subtracted from incoming data */
+	__u16 offset;
+	__u8 pad;
+	/* 1 => Companding; 2 => Delta (recommended); 3 => Combined (for HDR) */
+	__u8 mode;
+} __attribute__((packed));
+
+struct pisp_decompress_config {
+	/* value added to reconstructed data */
+	__u16 offset;
+	__u8 pad;
+	/* 1 => Companding; 2 => Delta (recommended); 3 => Combined (for HDR) */
+	__u8 mode;
+} __attribute__((packed));
+
+enum pisp_axi_flags {
+	/*
+	 * round down bursts to end at a 32-byte boundary, to align following
+	 * bursts
+	 */
+	PISP_AXI_FLAG_ALIGN = 128,
+	/* for FE writer: force WSTRB high, to pad output to 16-byte boundary */
+	PISP_AXI_FLAG_PAD = 64,
+	/* for FE writer: Use Output FIFO level to trigger "panic" */
+	PISP_AXI_FLAG_PANIC = 32,
+};
+
+struct pisp_axi_config {
+	/*
+	 * burst length minus one, which must be in the range 0:15; OR'd with
+	 * flags
+	 */
+	__u8 maxlen_flags;
+	/* { prot[2:0], cache[3:0] } fields, echoed on AXI bus */
+	__u8 cache_prot;
+	/* QoS field(s) (4x4 bits for FE writer; 4 bits for other masters) */
+	__u16 qos;
+} __attribute__((packed));
+
+#endif /* _UAPI_PISP_COMMON_H_ */
-- 
cgit v1.2.3


From 8f6c2202222faffa0959929d8cd9090254a60831 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Date: Wed, 26 Jun 2024 20:14:34 +0200
Subject: media: uapi: Add meta pixel format for PiSP BE config

Add format description for the PiSP Back End configuration parameter
buffer.

Signed-off-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Reviewed-by: Naushir Patuck <naush@raspberrypi.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/uapi/linux/videodev2.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index dd6876380fe3..96fc0456081e 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -843,6 +843,9 @@ struct v4l2_pix_format {
 #define V4L2_META_FMT_RK_ISP1_PARAMS	v4l2_fourcc('R', 'K', '1', 'P') /* Rockchip ISP1 3A Parameters */
 #define V4L2_META_FMT_RK_ISP1_STAT_3A	v4l2_fourcc('R', 'K', '1', 'S') /* Rockchip ISP1 3A Statistics */
 
+/* Vendor specific - used for RaspberryPi PiSP */
+#define V4L2_META_FMT_RPI_BE_CFG	v4l2_fourcc('R', 'P', 'B', 'C') /* PiSP BE configuration */
+
 #ifdef __KERNEL__
 /*
  * Line-based metadata formats. Remember to update v4l_fill_fmtdesc() when
-- 
cgit v1.2.3


From d260c1224786c870edbae09ef6ecf5f35361821e Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Date: Wed, 26 Jun 2024 20:14:35 +0200
Subject: media: uapi: Add PiSP Compressed RAW Bayer formats

Add Raspberry Pi compressed RAW Bayer formats.

The compression algorithm description is provided by Nick Hollinghurst
<nick.hollinghurst@raspberrypi.com> from Raspberry Pi.

Signed-off-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Reviewed-by: Naushir Patuck <naush@raspberrypi.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/uapi/linux/videodev2.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 96fc0456081e..4e91362da6da 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -816,6 +816,18 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_IPU3_SGRBG10	v4l2_fourcc('i', 'p', '3', 'G') /* IPU3 packed 10-bit GRBG bayer */
 #define V4L2_PIX_FMT_IPU3_SRGGB10	v4l2_fourcc('i', 'p', '3', 'r') /* IPU3 packed 10-bit RGGB bayer */
 
+/* Raspberry Pi PiSP compressed formats. */
+#define V4L2_PIX_FMT_PISP_COMP1_RGGB	v4l2_fourcc('P', 'C', '1', 'R') /* PiSP 8-bit mode 1 compressed RGGB bayer */
+#define V4L2_PIX_FMT_PISP_COMP1_GRBG	v4l2_fourcc('P', 'C', '1', 'G') /* PiSP 8-bit mode 1 compressed GRBG bayer */
+#define V4L2_PIX_FMT_PISP_COMP1_GBRG	v4l2_fourcc('P', 'C', '1', 'g') /* PiSP 8-bit mode 1 compressed GBRG bayer */
+#define V4L2_PIX_FMT_PISP_COMP1_BGGR	v4l2_fourcc('P', 'C', '1', 'B') /* PiSP 8-bit mode 1 compressed BGGR bayer */
+#define V4L2_PIX_FMT_PISP_COMP1_MONO	v4l2_fourcc('P', 'C', '1', 'M') /* PiSP 8-bit mode 1 compressed monochrome */
+#define V4L2_PIX_FMT_PISP_COMP2_RGGB	v4l2_fourcc('P', 'C', '2', 'R') /* PiSP 8-bit mode 2 compressed RGGB bayer */
+#define V4L2_PIX_FMT_PISP_COMP2_GRBG	v4l2_fourcc('P', 'C', '2', 'G') /* PiSP 8-bit mode 2 compressed GRBG bayer */
+#define V4L2_PIX_FMT_PISP_COMP2_GBRG	v4l2_fourcc('P', 'C', '2', 'g') /* PiSP 8-bit mode 2 compressed GBRG bayer */
+#define V4L2_PIX_FMT_PISP_COMP2_BGGR	v4l2_fourcc('P', 'C', '2', 'B') /* PiSP 8-bit mode 2 compressed BGGR bayer */
+#define V4L2_PIX_FMT_PISP_COMP2_MONO	v4l2_fourcc('P', 'C', '2', 'M') /* PiSP 8-bit mode 2 compressed monochrome */
+
 /* SDR formats - used only for Software Defined Radio devices */
 #define V4L2_SDR_FMT_CU8          v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */
 #define V4L2_SDR_FMT_CU16LE       v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */
-- 
cgit v1.2.3


From 7e1f4eb9a60d40dd17a97d9b76818682a024a127 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 4 Apr 2024 12:04:54 +0200
Subject: kallsyms: rework symbol lookup return codes

Building with W=1 in some configurations produces a false positive
warning for kallsyms:

kernel/kallsyms.c: In function '__sprint_symbol.isra':
kernel/kallsyms.c:503:17: error: 'strcpy' source argument is the same as destination [-Werror=restrict]
  503 |                 strcpy(buffer, name);
      |                 ^~~~~~~~~~~~~~~~~~~~

This originally showed up while building with -O3, but later started
happening in other configurations as well, depending on inlining
decisions. The underlying issue is that the local 'name' variable is
always initialized to the be the same as 'buffer' in the called functions
that fill the buffer, which gcc notices while inlining, though it could
see that the address check always skips the copy.

The calling conventions here are rather unusual, as all of the internal
lookup functions (bpf_address_lookup, ftrace_mod_address_lookup,
ftrace_func_address_lookup, module_address_lookup and
kallsyms_lookup_buildid) already use the provided buffer and either return
the address of that buffer to indicate success, or NULL for failure,
but the callers are written to also expect an arbitrary other buffer
to be returned.

Rework the calling conventions to return the length of the filled buffer
instead of its address, which is simpler and easier to follow as well
as avoiding the warning. Leave only the kallsyms_lookup() calling conventions
unchanged, since that is called from 16 different functions and
adapting this would be a much bigger change.

Link: https://lore.kernel.org/lkml/20200107214042.855757-1-arnd@arndb.de/
Link: https://lore.kernel.org/lkml/20240326130647.7bfb1d92@gandalf.local.home/
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/filter.h | 14 +++++++-------
 include/linux/ftrace.h |  6 +++---
 include/linux/module.h | 14 +++++++-------
 3 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0f12cf01070e..5669da513cd7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1208,18 +1208,18 @@ static inline bool bpf_jit_kallsyms_enabled(void)
 	return false;
 }
 
-const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
+int __bpf_address_lookup(unsigned long addr, unsigned long *size,
 				 unsigned long *off, char *sym);
 bool is_bpf_text_address(unsigned long addr);
 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		    char *sym);
 struct bpf_prog *bpf_prog_ksym_find(unsigned long addr);
 
-static inline const char *
+static inline int
 bpf_address_lookup(unsigned long addr, unsigned long *size,
 		   unsigned long *off, char **modname, char *sym)
 {
-	const char *ret = __bpf_address_lookup(addr, size, off, sym);
+	int ret = __bpf_address_lookup(addr, size, off, sym);
 
 	if (ret && modname)
 		*modname = NULL;
@@ -1263,11 +1263,11 @@ static inline bool bpf_jit_kallsyms_enabled(void)
 	return false;
 }
 
-static inline const char *
+static inline int
 __bpf_address_lookup(unsigned long addr, unsigned long *size,
 		     unsigned long *off, char *sym)
 {
-	return NULL;
+	return 0;
 }
 
 static inline bool is_bpf_text_address(unsigned long addr)
@@ -1286,11 +1286,11 @@ static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
 	return NULL;
 }
 
-static inline const char *
+static inline int
 bpf_address_lookup(unsigned long addr, unsigned long *size,
 		   unsigned long *off, char **modname, char *sym)
 {
-	return NULL;
+	return 0;
 }
 
 static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 800995c425e0..b792274189a3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -86,15 +86,15 @@ struct ftrace_hash;
 
 #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \
 	defined(CONFIG_DYNAMIC_FTRACE)
-const char *
+int
 ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
 		   unsigned long *off, char **modname, char *sym);
 #else
-static inline const char *
+static inline int
 ftrace_mod_address_lookup(unsigned long addr, unsigned long *size,
 		   unsigned long *off, char **modname, char *sym)
 {
-	return NULL;
+	return 0;
 }
 #endif
 
diff --git a/include/linux/module.h b/include/linux/module.h
index ffa1c603163c..330ffb59efe5 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -931,11 +931,11 @@ int module_kallsyms_on_each_symbol(const char *modname,
  * least KSYM_NAME_LEN long: a pointer to namebuf is returned if
  * found, otherwise NULL.
  */
-const char *module_address_lookup(unsigned long addr,
-				  unsigned long *symbolsize,
-				  unsigned long *offset,
-				  char **modname, const unsigned char **modbuildid,
-				  char *namebuf);
+int module_address_lookup(unsigned long addr,
+			  unsigned long *symbolsize,
+			  unsigned long *offset,
+			  char **modname, const unsigned char **modbuildid,
+			  char *namebuf);
 int lookup_module_symbol_name(unsigned long addr, char *symname);
 int lookup_module_symbol_attrs(unsigned long addr,
 			       unsigned long *size,
@@ -964,14 +964,14 @@ static inline int module_kallsyms_on_each_symbol(const char *modname,
 }
 
 /* For kallsyms to ask for address resolution.  NULL means not found. */
-static inline const char *module_address_lookup(unsigned long addr,
+static inline int module_address_lookup(unsigned long addr,
 						unsigned long *symbolsize,
 						unsigned long *offset,
 						char **modname,
 						const unsigned char **modbuildid,
 						char *namebuf)
 {
-	return NULL;
+	return 0;
 }
 
 static inline int lookup_module_symbol_name(unsigned long addr, char *symname)
-- 
cgit v1.2.3


From 1bc6d4452d5c91beb09e37a98a590808e1997b79 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 30 Apr 2024 13:54:46 +0200
Subject: fs: new helper vfs_empty_path()

Make it possible to quickly check whether AT_EMPTY_PATH is valid.
Note, after some discussion we decided to also allow NULL to be passed
instead of requiring the empty string.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 942cb11dba96..5ff362277834 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3631,4 +3631,21 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
 extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
 			   int advice);
 
+static inline bool vfs_empty_path(int dfd, const char __user *path)
+{
+	char c;
+
+	if (dfd < 0)
+		return false;
+
+	/* We now allow NULL to be used for empty path. */
+	if (!path)
+		return true;
+
+	if (unlikely(get_user(c, path)))
+		return false;
+
+	return !c;
+}
+
 #endif /* _LINUX_FS_H */
-- 
cgit v1.2.3


From f378ec4eec8b7e607dbc0112913fd3d5d84eb1b8 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Thu, 27 Jun 2024 18:11:52 +0200
Subject: vfs: rename parent_ino to d_parent_ino and make it use RCU

The routine is used by procfs through dir_emit_dots.

The combined RCU and lock fallback implementation is too big for an
inline. Given that the routine takes a dentry argument fs/dcache.c seems
like the place to put it in.

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://lore.kernel.org/r/20240627161152.802567-1-mjguzik@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/dcache.h |  2 ++
 include/linux/fs.h     | 16 +---------------
 2 files changed, 3 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index bf53e3894aae..ea58843942b9 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -278,6 +278,8 @@ static inline unsigned d_count(const struct dentry *dentry)
 	return dentry->d_lockref.count;
 }
 
+ino_t d_parent_ino(struct dentry *dentry);
+
 /*
  * helper function for dentry_operations.d_dname() members
  */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5ff362277834..2fa06a4d197a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3454,20 +3454,6 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
 	return 0;
 }
 
-static inline ino_t parent_ino(struct dentry *dentry)
-{
-	ino_t res;
-
-	/*
-	 * Don't strictly need d_lock here? If the parent ino could change
-	 * then surely we'd have a deeper race in the caller?
-	 */
-	spin_lock(&dentry->d_lock);
-	res = dentry->d_parent->d_inode->i_ino;
-	spin_unlock(&dentry->d_lock);
-	return res;
-}
-
 /* Transaction based IO helpers */
 
 /*
@@ -3592,7 +3578,7 @@ static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx)
 static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx)
 {
 	return ctx->actor(ctx, "..", 2, ctx->pos,
-			  parent_ino(file->f_path.dentry), DT_DIR);
+			  d_parent_ino(file->f_path.dentry), DT_DIR);
 }
 static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
 {
-- 
cgit v1.2.3


From 63db4a1f795a19e4e12f036a12a5f61c48b03e5c Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Thu, 27 Jun 2024 16:07:35 +0000
Subject: block: Delete blk_queue_flag_test_and_set()

Since commit 70200574cc22 ("block: remove QUEUE_FLAG_DISCARD"),
blk_queue_flag_test_and_set() has not been used, so delete it.

Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240627160735.842189-1-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a53e3434e1a2..53c41ef4222c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -609,7 +609,6 @@ struct request_queue {
 
 void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
 void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
-bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
-- 
cgit v1.2.3


From dd6d7f8574d7f8b6a0bf1aeef0b285d2706b8c2a Mon Sep 17 00:00:00 2001
From: Akiva Goldberger <agoldberger@nvidia.com>
Date: Thu, 27 Jun 2024 21:23:49 +0300
Subject: RDMA: Pass entire uverbs attr bundle to create cq function

Changes the create_cq verb signature by sending the entire uverbs attr
bundle as a parameter. This allows drivers to send driver specific attrs
through ioctl for the create_cq verb and access them in their driver
specific code.

Also adds a new enum value for driver specific ioctl attributes for
methods already supporting UHW.

Link: https://lore.kernel.org/r/ed147343987c0d43fd391c1b2f85e2f425747387.1719512393.git.leon@kernel.org
Signed-off-by: Akiva Goldberger <agoldberger@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/rdma/ib_verbs.h                | 2 +-
 include/uapi/rdma/ib_user_ioctl_cmds.h | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5a193008f99c..825043512b2d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2465,7 +2465,7 @@ struct ib_device_ops {
 			int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
 	int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
 	int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
-			 struct ib_udata *udata);
+			 struct uverbs_attr_bundle *attrs);
 	int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
 	int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
 	int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index dafc7ebe545b..ec719053aab9 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -37,9 +37,6 @@
 #define UVERBS_ID_NS_MASK 0xF000
 #define UVERBS_ID_NS_SHIFT 12
 
-#define UVERBS_UDATA_DRIVER_DATA_NS	1
-#define UVERBS_UDATA_DRIVER_DATA_FLAG	(1UL << UVERBS_ID_NS_SHIFT)
-
 enum uverbs_default_objects {
 	UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */
 	UVERBS_OBJECT_PD,
@@ -61,8 +58,10 @@ enum uverbs_default_objects {
 };
 
 enum {
-	UVERBS_ATTR_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG,
+	UVERBS_ID_DRIVER_NS = 1UL << UVERBS_ID_NS_SHIFT,
+	UVERBS_ATTR_UHW_IN = UVERBS_ID_DRIVER_NS,
 	UVERBS_ATTR_UHW_OUT,
+	UVERBS_ID_DRIVER_NS_WITH_UHW,
 };
 
 enum uverbs_methods_device {
-- 
cgit v1.2.3


From 589b844f1bf04850d9fabcaa2e943325dc6768b4 Mon Sep 17 00:00:00 2001
From: Akiva Goldberger <agoldberger@nvidia.com>
Date: Thu, 27 Jun 2024 21:23:50 +0300
Subject: RDMA/mlx5: Send UAR page index as ioctl attribute

Add UAR page index as a driver ioctl attribute to increase the number of
supported indices, previously limited to 16 bits by mlx5_ib_create_cq
struct.

Link: https://lore.kernel.org/r/0e18b34d7ec3b1ae02d694b0d545aed7413c0ef7.1719512393.git.leon@kernel.org
Signed-off-by: Akiva Goldberger <agoldberger@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/rdma/mlx5_user_ioctl_cmds.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 595edad03dfe..5b74d6534899 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -270,6 +270,10 @@ enum mlx5_ib_device_query_context_attrs {
 	MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX = (1U << UVERBS_ID_NS_SHIFT),
 };
 
+enum mlx5_ib_create_cq_attrs {
+	MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX = UVERBS_ID_DRIVER_NS_WITH_UHW,
+};
+
 #define MLX5_IB_DW_MATCH_PARAM 0xA0
 
 struct mlx5_ib_match_params {
-- 
cgit v1.2.3


From 12eba993b94c9186e44a01f46e38b3ab3c635f2d Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Fri, 17 May 2024 20:40:01 +0800
Subject: ext4: make ext4_es_insert_delayed_block() insert multi-blocks

Rename ext4_es_insert_delayed_block() to ext4_es_insert_delayed_extent()
and pass length parameter to make it insert multiple delalloc blocks at
a time. For the case of bigalloc, split the allocated parameter to
lclu_allocated and end_allocated. lclu_allocated indicates the
allocation state of the cluster which is containing the lblk,
end_allocated indicates the allocation state of the extent end, clusters
in the middle of delay allocated extent must be unallocated.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20240517124005.347221-7-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/trace/events/ext4.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index a697f4b77162..6b41ac61310f 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2478,11 +2478,11 @@ TRACE_EVENT(ext4_es_shrink,
 		  __entry->scan_time, __entry->nr_skipped, __entry->retried)
 );
 
-TRACE_EVENT(ext4_es_insert_delayed_block,
+TRACE_EVENT(ext4_es_insert_delayed_extent,
 	TP_PROTO(struct inode *inode, struct extent_status *es,
-		 bool allocated),
+		 bool lclu_allocated, bool end_allocated),
 
-	TP_ARGS(inode, es, allocated),
+	TP_ARGS(inode, es, lclu_allocated, end_allocated),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,		dev		)
@@ -2491,7 +2491,8 @@ TRACE_EVENT(ext4_es_insert_delayed_block,
 		__field(	ext4_lblk_t,	len		)
 		__field(	ext4_fsblk_t,	pblk		)
 		__field(	char,		status		)
-		__field(	bool,		allocated	)
+		__field(	bool,		lclu_allocated	)
+		__field(	bool,		end_allocated	)
 	),
 
 	TP_fast_assign(
@@ -2501,16 +2502,17 @@ TRACE_EVENT(ext4_es_insert_delayed_block,
 		__entry->len		= es->es_len;
 		__entry->pblk		= ext4_es_show_pblock(es);
 		__entry->status		= ext4_es_status(es);
-		__entry->allocated	= allocated;
+		__entry->lclu_allocated	= lclu_allocated;
+		__entry->end_allocated	= end_allocated;
 	),
 
 	TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s "
-		  "allocated %d",
+		  "allocated %d %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->lblk, __entry->len,
 		  __entry->pblk, show_extent_status(__entry->status),
-		  __entry->allocated)
+		  __entry->lclu_allocated, __entry->end_allocated)
 );
 
 /* fsmap traces */
-- 
cgit v1.2.3


From 0d66b23d79c750276f791411d81a524549a64852 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Fri, 17 May 2024 20:40:02 +0800
Subject: ext4: make ext4_da_reserve_space() reserve multi-clusters

Add 'nr_resv' parameter to ext4_da_reserve_space(), which indicates the
number of clusters wants to reserve, make it reserve multiple clusters
at a time.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20240517124005.347221-8-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/trace/events/ext4.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 6b41ac61310f..cc5e9b7b2b44 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -1246,14 +1246,15 @@ TRACE_EVENT(ext4_da_update_reserve_space,
 );
 
 TRACE_EVENT(ext4_da_reserve_space,
-	TP_PROTO(struct inode *inode),
+	TP_PROTO(struct inode *inode, int nr_resv),
 
-	TP_ARGS(inode),
+	TP_ARGS(inode, nr_resv),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	__u64,	i_blocks		)
+		__field(	int,	reserve_blocks		)
 		__field(	int,	reserved_data_blocks	)
 		__field(	__u16,  mode			)
 	),
@@ -1262,16 +1263,17 @@ TRACE_EVENT(ext4_da_reserve_space,
 		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->i_blocks = inode->i_blocks;
+		__entry->reserve_blocks = nr_resv;
 		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
 		__entry->mode	= inode->i_mode;
 	),
 
-	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu "
+	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu reserve_blocks %d"
 		  "reserved_data_blocks %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->mode, __entry->i_blocks,
-		  __entry->reserved_data_blocks)
+		  __entry->reserve_blocks, __entry->reserved_data_blocks)
 );
 
 TRACE_EVENT(ext4_da_release_space,
-- 
cgit v1.2.3


From d3dcb084c70727be4a2f61bd94796e66147cfa35 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Wed, 26 Jun 2024 05:06:17 +0200
Subject: net: phy: phy_device: Fix PHY LED blinking code comment

Fix copy-paste error in the code comment. The code refers to
LED blinking configuration, not brightness configuration. It
was likely copied from comment above this one which does
refer to brightness configuration.

Fixes: 4e901018432e ("net: phy: phy_device: Call into the PHY driver to set LED blinking")
Signed-off-by: Marek Vasut <marex@denx.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20240626030638.512069-1-marex@denx.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index e6e83304558e..3be430cf3132 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1122,7 +1122,7 @@ struct phy_driver {
 				  u8 index, enum led_brightness value);
 
 	/**
-	 * @led_blink_set: Set a PHY LED brightness.  Index indicates
+	 * @led_blink_set: Set a PHY LED blinking.  Index indicates
 	 * which of the PHYs led should be configured to blink. Delays
 	 * are in milliseconds and if both are zero then a sensible
 	 * default should be chosen.  The call should adjust the
-- 
cgit v1.2.3


From 502099acb8cbf08acb6b43ff2b8da43d2bf85166 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Thu, 27 Jun 2024 18:08:47 +0100
Subject: firewire: core: Fix spelling mistakes in tracepoint messages

There are two spelling mistakes in the tracepoint message text. Fix them.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20240627170847.125531-1-colin.i.king@gmail.com
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index aa00c9f33551..bac9d98e88e5 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -854,7 +854,7 @@ DECLARE_EVENT_CLASS(isoc_single_completions_template,
 		memcpy(__get_dynamic_array(header), header, __get_dynamic_array_len(header));
 	),
 	TP_printk(
-		"context=0x%llx card_index=%u timestap=0x%04x cause=%s header=%s",
+		"context=0x%llx card_index=%u timestamp=0x%04x cause=%s header=%s",
 		__entry->context,
 		__entry->card_index,
 		__entry->timestamp,
@@ -892,7 +892,7 @@ TRACE_EVENT(isoc_inbound_multiple_completions,
 		__entry->cause = cause;
 	),
 	TP_printk(
-		"context=0x%llx card_index=%u comleted=%u cause=%s",
+		"context=0x%llx card_index=%u completed=%u cause=%s",
 		__entry->context,
 		__entry->card_index,
 		__entry->completed,
-- 
cgit v1.2.3


From ff2c570ef7eaa9ded58e7a02dd7a68874a897508 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 7 Jun 2024 16:55:35 +0200
Subject: path: add cleanup helper

Add a simple cleanup helper so we can cleanup struct path easily.
No need for any extra machinery. Avoid DEFINE_FREE() as it causes a
local copy of struct path to be used. Just rely on path_put() directly
called from a cleanup helper.

Link: https://lore.kernel.org/r/20240607-vfs-listmount-reverse-v1-2-7877a2bfa5e5@kernel.org
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/path.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/path.h b/include/linux/path.h
index 475225a03d0d..ca073e70decd 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -24,4 +24,13 @@ static inline void path_put_init(struct path *path)
 	*path = (struct path) { };
 }
 
+/*
+ * Cleanup macro for use with __free(path_put). Avoids dereference and
+ * copying @path unlike DEFINE_FREE(). path_put() will handle the empty
+ * path correctly just ensure @path is initialized:
+ *
+ * struct path path __free(path_put) = {};
+ */
+#define __free_path_put path_put
+
 #endif  /* _LINUX_PATH_H */
-- 
cgit v1.2.3


From d04bccd8c19d601232ed3e3c9e248c0040167d47 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 7 Jun 2024 16:55:37 +0200
Subject: listmount: allow listing in reverse order

util-linux is about to implement listmount() and statmount() support.
Karel requested the ability to scan the mount table in backwards order
because that's what libmount currently does in order to get the latest
mount first. We currently don't support this in listmount(). Add a new
LISTMOUNT_REVERSE flag to allow listing mounts in reverse order. For
example, listing all child mounts of /sys without LISTMOUNT_REVERSE
gives:

    /sys/kernel/security @ mnt_id: 4294968369
    /sys/fs/cgroup @ mnt_id: 4294968370
    /sys/firmware/efi/efivars @ mnt_id: 4294968371
    /sys/fs/bpf @ mnt_id: 4294968372
    /sys/kernel/tracing @ mnt_id: 4294968373
    /sys/kernel/debug @ mnt_id: 4294968374
    /sys/fs/fuse/connections @ mnt_id: 4294968375
    /sys/kernel/config @ mnt_id: 4294968376

whereas with LISTMOUNT_REVERSE it gives:

    /sys/kernel/config @ mnt_id: 4294968376
    /sys/fs/fuse/connections @ mnt_id: 4294968375
    /sys/kernel/debug @ mnt_id: 4294968374
    /sys/kernel/tracing @ mnt_id: 4294968373
    /sys/fs/bpf @ mnt_id: 4294968372
    /sys/firmware/efi/efivars @ mnt_id: 4294968371
    /sys/fs/cgroup @ mnt_id: 4294968370
    /sys/kernel/security @ mnt_id: 4294968369

Link: https://lore.kernel.org/r/20240607-vfs-listmount-reverse-v1-4-7877a2bfa5e5@kernel.org
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/mount.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index ad5478dbad00..88d78de1519f 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -207,5 +207,6 @@ struct mnt_id_req {
  * Special @mnt_id values that can be passed to listmount
  */
 #define LSMT_ROOT		0xffffffffffffffff	/* root mount */
+#define LISTMOUNT_REVERSE	(1 << 0) /* List later mounts first */
 
 #endif /* _UAPI_LINUX_MOUNT_H */
-- 
cgit v1.2.3


From 09b31295f833031c88419550172703d45c5401e3 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Mon, 24 Jun 2024 11:49:47 -0400
Subject: fs: export the mount ns id via statmount

In order to allow users to iterate through children mount namespaces via
listmount we need a way for them to know what the ns id for the mount.
Add a new field to statmount called mnt_ns_id which will carry the ns id
for the given mount entry.

Co-developed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/6dabf437331fb7415d886f7c64b21cb2a50b1c66.1719243756.git.josef@toxicpanda.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/mount.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 88d78de1519f..a07508aee518 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -172,7 +172,8 @@ struct statmount {
 	__u64 propagate_from;	/* Propagation from in current namespace */
 	__u32 mnt_root;		/* [str] Root of mount relative to root of fs */
 	__u32 mnt_point;	/* [str] Mountpoint relative to current root */
-	__u64 __spare2[50];
+	__u64 mnt_ns_id;	/* ID of the mount namespace */
+	__u64 __spare2[49];
 	char str[];		/* Variable size part containing strings */
 };
 
@@ -202,6 +203,7 @@ struct mnt_id_req {
 #define STATMOUNT_MNT_ROOT		0x00000008U	/* Want/got mnt_root  */
 #define STATMOUNT_MNT_POINT		0x00000010U	/* Want/got mnt_point */
 #define STATMOUNT_FS_TYPE		0x00000020U	/* Want/got fs_type */
+#define STATMOUNT_MNT_NS_ID		0x00000040U	/* Want/got mnt_ns_id */
 
 /*
  * Special @mnt_id values that can be passed to listmount
-- 
cgit v1.2.3


From 0a3deb11858ae8a0b3849b5fda45512ad383f0e1 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Mon, 24 Jun 2024 11:49:48 -0400
Subject: fs: Allow listmount() in foreign mount namespace

Expand struct mnt_id_req to add an optional mnt_ns_id field.  When this
field is populated, listmount() will be performed on the specified mount
namespace, provided the currently application has CAP_SYS_ADMIN in its
user namespace and the mount namespace is a child of the current
namespace.

Co-developed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/49930bdce29a8367a213eb14c1e68e7e49284f86.1719243756.git.josef@toxicpanda.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/mount.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index a07508aee518..ee1559cd6764 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -189,10 +189,12 @@ struct mnt_id_req {
 	__u32 spare;
 	__u64 mnt_id;
 	__u64 param;
+	__u64 mnt_ns_id;
 };
 
 /* List of all mnt_id_req versions. */
 #define MNT_ID_REQ_SIZE_VER0	24 /* sizeof first published struct */
+#define MNT_ID_REQ_SIZE_VER1	32 /* sizeof second published struct */
 
 /*
  * @mask bits for statmount(2)
-- 
cgit v1.2.3


From e8e43a1fcc5c07575f37e40f8a2cd78aee46f9a0 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Mon, 24 Jun 2024 11:49:50 -0400
Subject: fs: add an ioctl to get the mnt ns id from nsfs

In order to utilize the listmount() and statmount() extensions that
allow us to call them on different namespaces we need a way to get the
mnt namespace id from user space.  Add an ioctl to nsfs that will allow
us to extract the mnt namespace id in order to make these new extensions
usable.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/180449959d5a756af7306d6bda55f41b9d53e3cb.1719243756.git.josef@toxicpanda.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/nsfs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index a0c8552b64ee..56e8b1639b98 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -15,5 +15,7 @@
 #define NS_GET_NSTYPE		_IO(NSIO, 0x3)
 /* Get owner UID (in the caller's user namespace) for a user namespace */
 #define NS_GET_OWNER_UID	_IO(NSIO, 0x4)
+/* Get the id for a mount namespace */
+#define NS_GET_MNTNS_ID		_IO(NSIO, 0x5)
 
 #endif /* __LINUX_NSFS_H */
-- 
cgit v1.2.3


From 8c62617295d3c4cd03f1a02c3b9bf9d4e6d6e0c6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 27 Jun 2024 13:25:27 +0200
Subject: wifi: mac80211: remove DEAUTH_NEED_MGD_TX_PREP

This flag is annoying because it puts a lot of logic into mac80211
that could just as well be in the driver (only iwlmvm uses it) and
the implementation is also broken for MLO.

Remove the flag in favour of calling drv_mgd_prepare_tx() without
any conditions even for the deauth-while-assoc case. The drivers
that implement it can take the appropriate actions, which for the
only user of DEAUTH_NEED_MGD_TX_PREP (iwlmvm) is a bit more tricky
than the implementation in mac80211 is anyway, and all others have
no need and can just exit if info->was_assoc is set.

Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20240627132527.94924bcc9c9e.I328a219e45f2e2724cd52e75bb9feee3bf21a463@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9c96e8ae9ef7..bd0f8aefa797 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2767,14 +2767,6 @@ struct ieee80211_txq {
  * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on
  *	TDLS links.
  *
- * @IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP: The driver requires the
- *	mgd_prepare_tx() callback to be called before transmission of a
- *	deauthentication frame in case the association was completed but no
- *	beacon was heard. This is required in multi-channel scenarios, where the
- *	virtual interface might not be given air time for the transmission of
- *	the frame, as it is not synced with the AP/P2P GO yet, and thus the
- *	deauthentication frame might not be transmitted.
- *
  * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't
  *	support QoS NDP for AP probing - that's most likely a driver bug.
  *
@@ -2874,7 +2866,6 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_REPORTS_LOW_ACK,
 	IEEE80211_HW_SUPPORTS_TX_FRAG,
 	IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
-	IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP,
 	IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP,
 	IEEE80211_HW_BUFF_MMPDU_TXQ,
 	IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW,
@@ -3787,13 +3778,15 @@ enum ieee80211_reconfig_type {
  * @success: whether the frame exchange was successful, only
  *	used with the mgd_complete_tx() method, and then only
  *	valid for auth and (re)assoc.
+ * @was_assoc: set if this call is due to deauth/disassoc
+ *	while just having been associated
  * @link_id: the link id on which the frame will be TX'ed.
  *	Only used with the mgd_prepare_tx() method.
  */
 struct ieee80211_prep_tx_info {
 	u16 duration;
 	u16 subtype;
-	u8 success:1;
+	u8 success:1, was_assoc:1;
 	int link_id;
 };
 
@@ -4242,12 +4235,9 @@ struct ieee80211_prep_tx_info {
  *	yet it need not necessarily be given airtime, in particular since any
  *	transmission to a P2P GO needs to be synchronized against the GO's
  *	powersave state. mac80211 will call this function before transmitting a
- *	management frame prior to having successfully associated to allow the
- *	driver to give it channel time for the transmission, to get a response
- *	and to be able to synchronize with the GO.
- *	For drivers that set %IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, mac80211
- *	would also call this function before transmitting a deauthentication
- *	frame in case that no beacon was heard from the AP/P2P GO.
+ *	management frame prior to transmitting that frame to allow the driver
+ *	to give it channel time for the transmission, to get a response and be
+ *	able to synchronize with the GO.
  *	The callback will be called before each transmission and upon return
  *	mac80211 will transmit the frame right away.
  *	Additional information is passed in the &struct ieee80211_prep_tx_info
-- 
cgit v1.2.3


From 816c6bec09ed5b90a58a1e12d5a606c5b6e23f47 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 27 Jun 2024 10:42:56 +0200
Subject: wifi: mac80211: fix BSS_CHANGED_UNSOL_BCAST_PROBE_RESP

Fix the definition of BSS_CHANGED_UNSOL_BCAST_PROBE_RESP so that
not all higher bits get set, 1<<31 is a signed variable, so when
we do

  u64 changed = BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;

we get sign expansion, so the value is 0xffff'ffff'8000'0000 and
that's clearly not desired. Use BIT_ULL() to make it unsigned as
well as the right type for the change flags.

Fixes: 178e9d6adc43 ("wifi: mac80211: fix unsolicited broadcast probe config")
Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20240627104257.06174d291db2.Iba0d642916eb78a61f8ab2cc5ca9280783d9c1db@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index cafc664ee531..45ad37adbe32 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -395,7 +395,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_HE_OBSS_PD		= 1<<28,
 	BSS_CHANGED_HE_BSS_COLOR	= 1<<29,
 	BSS_CHANGED_FILS_DISCOVERY      = 1<<30,
-	BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31,
+	BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = BIT_ULL(31),
 	BSS_CHANGED_MLD_VALID_LINKS	= BIT_ULL(33),
 	BSS_CHANGED_MLD_TTLM		= BIT_ULL(34),
 
-- 
cgit v1.2.3


From c6269149cbf7053272d918101981869438ff7c1e Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 27 Jun 2024 16:11:39 +0200
Subject: file: add take_fd() cleanup helper

Add a helper that returns the file descriptor and ensures that the old
variable contains a negative value. This makes it easy to rely on
CLASS(get_unused_fd).

Link: https://lore.kernel.org/r/20240627-work-pidfs-v1-1-7e9ab6cc3bb1@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/cleanup.h | 13 ++++++++-----
 include/linux/file.h    | 20 ++++++++++++++++++++
 2 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index c2d09bc4f976..80c4181e194a 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -63,17 +63,20 @@
 
 #define __free(_name)	__cleanup(__free_##_name)
 
-#define __get_and_null_ptr(p) \
-	({ __auto_type __ptr = &(p); \
-	   __auto_type __val = *__ptr; \
-	   *__ptr = NULL;  __val; })
+#define __get_and_null(p, nullvalue)   \
+	({                                  \
+		__auto_type __ptr = &(p);   \
+		__auto_type __val = *__ptr; \
+		*__ptr = nullvalue;         \
+		__val;                      \
+	})
 
 static inline __must_check
 const volatile void * __must_check_fn(const volatile void *val)
 { return val; }
 
 #define no_free_ptr(p) \
-	((typeof(p)) __must_check_fn(__get_and_null_ptr(p)))
+	((typeof(p)) __must_check_fn(__get_and_null(p, NULL)))
 
 #define return_ptr(p)	return no_free_ptr(p)
 
diff --git a/include/linux/file.h b/include/linux/file.h
index 45d0f4800abd..237931f20739 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -97,6 +97,26 @@ extern void put_unused_fd(unsigned int fd);
 DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
 	     get_unused_fd_flags(flags), unsigned flags)
 
+/*
+ * take_fd() will take care to set @fd to -EBADF ensuring that
+ * CLASS(get_unused_fd) won't call put_unused_fd(). This makes it
+ * easier to rely on CLASS(get_unused_fd):
+ *
+ * struct file *f;
+ *
+ * CLASS(get_unused_fd, fd)(O_CLOEXEC);
+ * if (fd < 0)
+ *         return fd;
+ *
+ * f = dentry_open(&path, O_RDONLY, current_cred());
+ * if (IS_ERR(f))
+ *         return PTR_ERR(fd);
+ *
+ * fd_install(fd, f);
+ * return take_fd(fd);
+ */
+#define take_fd(fd) __get_and_null(fd, -EBADF)
+
 extern void fd_install(unsigned int fd, struct file *file);
 
 int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags);
-- 
cgit v1.2.3


From d057c108155ab8d02b603c7ee5da7df615c2f32f Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 27 Jun 2024 16:11:40 +0200
Subject: nsproxy: add a cleanup helper for nsproxy

Add a simple cleanup helper for nsproxy.

Link: https://lore.kernel.org/r/20240627-work-pidfs-v1-2-7e9ab6cc3bb1@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/nsproxy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 5601d14e2886..e6bec522b139 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -112,4 +112,6 @@ static inline void get_nsproxy(struct nsproxy *ns)
 	refcount_inc(&ns->count);
 }
 
+DEFINE_FREE(put_nsproxy, struct nsproxy *, if (_T) put_nsproxy(_T))
+
 #endif
-- 
cgit v1.2.3


From 85e4daaeb70bc68ec920e0c268eb428113d31b21 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Fri, 28 Jun 2024 10:05:19 +0200
Subject: nsproxy: add helper to go from arbitrary namespace to ns_common

They all contains struct ns_common ns and if there ever is one where
that isn't the case we'll catch it here at build time.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/nsproxy.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index e6bec522b139..dab6a1734a22 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -42,6 +42,17 @@ struct nsproxy {
 };
 extern struct nsproxy init_nsproxy;
 
+#define to_ns_common(__ns)                              \
+	_Generic((__ns),                                \
+		struct cgroup_namespace *: &(__ns->ns), \
+		struct ipc_namespace *:    &(__ns->ns), \
+		struct net *:              &(__ns->ns), \
+		struct pid_namespace *:    &(__ns->ns), \
+		struct mnt_namespace *:    &(__ns->ns), \
+		struct time_namespace *:   &(__ns->ns), \
+		struct user_namespace *:   &(__ns->ns), \
+		struct uts_namespace *:    &(__ns->ns))
+
 /*
  * A structure to encompass all bits needed to install
  * a partial or complete new set of namespaces.
-- 
cgit v1.2.3


From 5b08bd408534bfb3a7cf5778da5b27d4e4fffe12 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 27 Jun 2024 16:11:42 +0200
Subject: pidfs: allow retrieval of namespace file descriptors

For users that hold a reference to a pidfd procfs might not even be
available nor is it desirable to parse through procfs just for the sake
of getting namespace file descriptors for a process.

Make it possible to directly retrieve namespace file descriptors from a
pidfd. Pidfds already can be used with setns() to change a set of
namespaces atomically.

Link: https://lore.kernel.org/r/20240627-work-pidfs-v1-4-7e9ab6cc3bb1@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/pidfd.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 72ec000a97cd..565fc0629fff 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -5,6 +5,7 @@
 
 #include <linux/types.h>
 #include <linux/fcntl.h>
+#include <linux/ioctl.h>
 
 /* Flags for pidfd_open().  */
 #define PIDFD_NONBLOCK	O_NONBLOCK
@@ -15,4 +16,17 @@
 #define PIDFD_SIGNAL_THREAD_GROUP	(1UL << 1)
 #define PIDFD_SIGNAL_PROCESS_GROUP	(1UL << 2)
 
+#define PIDFS_IOCTL_MAGIC 0xFF
+
+#define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
+#define PIDFD_GET_IPC_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 2)
+#define PIDFD_GET_MNT_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 3)
+#define PIDFD_GET_NET_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 4)
+#define PIDFD_GET_PID_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 5)
+#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE  _IO(PIDFS_IOCTL_MAGIC, 6)
+#define PIDFD_GET_TIME_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 7)
+#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
+#define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
+#define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
+
 #endif /* _UAPI_LINUX_PIDFD_H */
-- 
cgit v1.2.3


From 69540b7987ef05d1dff36981d4cc7c31e9716b36 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 27 Jun 2024 17:08:48 +0300
Subject: ethtool: Add ethtool operation to write to a transceiver module
 EEPROM

Ethtool can already retrieve information from a transceiver module
EEPROM by invoking the ethtool_ops::get_module_eeprom_by_page operation.
Add a corresponding operation that allows ethtool to write to a
transceiver module EEPROM.

The new write operation is purely an in-kernel API and is not exposed to
user space.

The purpose of this operation is not to enable arbitrary read / write
access, but to allow the kernel to write to specific addresses as part
of transceiver module firmware flashing. In the future, more
functionality can be implemented on top of these read / write
operations.

Adjust the comments of the 'ethtool_module_eeprom' structure as it is
no longer used only for read access.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 959196af7f5a..c7f6f2bc9cac 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -506,17 +506,16 @@ struct ethtool_ts_stats {
 #define ETH_MODULE_MAX_I2C_ADDRESS	0x7f
 
 /**
- * struct ethtool_module_eeprom - EEPROM dump from specified page
- * @offset: Offset within the specified EEPROM page to begin read, in bytes.
- * @length: Number of bytes to read.
- * @page: Page number to read from.
- * @bank: Page bank number to read from, if applicable by EEPROM spec.
+ * struct ethtool_module_eeprom - plug-in module EEPROM read / write parameters
+ * @offset: When @offset is 0-127, it is used as an address to the Lower Memory
+ *	(@page must be 0). Otherwise, it is used as an address to the
+ *	Upper Memory.
+ * @length: Number of bytes to read / write.
+ * @page: Page number.
+ * @bank: Bank number, if supported by EEPROM spec.
  * @i2c_address: I2C address of a page. Value less than 0x7f expected. Most
  *	EEPROMs use 0x50 or 0x51.
  * @data: Pointer to buffer with EEPROM data of @length size.
- *
- * This can be used to manage pages during EEPROM dump in ethtool and pass
- * required information to the driver.
  */
 struct ethtool_module_eeprom {
 	u32	offset;
@@ -824,6 +823,8 @@ struct ethtool_rxfh_param {
  * @get_module_eeprom_by_page: Get a region of plug-in module EEPROM data from
  *	specified page. Returns a negative error code or the amount of bytes
  *	read.
+ * @set_module_eeprom_by_page: Write to a region of plug-in module EEPROM,
+ *	from kernel space only. Returns a negative error code or zero.
  * @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics.
  * @get_eth_mac_stats: Query some of the IEEE 802.3 MAC statistics.
  * @get_eth_ctrl_stats: Query some of the IEEE 802.3 MAC Ctrl statistics.
@@ -958,6 +959,9 @@ struct ethtool_ops {
 	int	(*get_module_eeprom_by_page)(struct net_device *dev,
 					     const struct ethtool_module_eeprom *page,
 					     struct netlink_ext_ack *extack);
+	int	(*set_module_eeprom_by_page)(struct net_device *dev,
+					     const struct ethtool_module_eeprom *page,
+					     struct netlink_ext_ack *extack);
 	void	(*get_eth_phy_stats)(struct net_device *dev,
 				     struct ethtool_eth_phy_stats *phy_stats);
 	void	(*get_eth_mac_stats)(struct net_device *dev,
-- 
cgit v1.2.3


From 46fb3ba95b93d1887e6dfa02a535e0526062de95 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Thu, 27 Jun 2024 17:08:50 +0300
Subject: ethtool: Add an interface for flashing transceiver modules' firmware

CMIS compliant modules such as QSFP-DD might be running a firmware that
can be updated in a vendor-neutral way by exchanging messages between
the host and the module as described in section 7.3.1 of revision 5.2 of
the CMIS standard.

Add a pair of new ethtool messages that allow:

* User space to trigger firmware update of transceiver modules

* The kernel to notify user space about the progress of the process

The user interface is designed to be asynchronous in order to avoid
RTNL being held for too long and to allow several modules to be
updated simultaneously. The interface is designed with CMIS compliant
modules in mind, but kept generic enough to accommodate future use
cases, if these arise.

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h         | 18 ++++++++++++++++++
 include/uapi/linux/ethtool_netlink.h | 19 +++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 8733a3117902..e011384c915c 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -877,6 +877,24 @@ enum ethtool_mm_verify_status {
 	ETHTOOL_MM_VERIFY_STATUS_DISABLED,
 };
 
+/**
+ * enum ethtool_module_fw_flash_status - plug-in module firmware flashing status
+ * @ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED: The firmware flashing process has
+ *	started.
+ * @ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS: The firmware flashing process
+ *	is in progress.
+ * @ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED: The firmware flashing process was
+ *	completed successfully.
+ * @ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR: The firmware flashing process was
+ *	stopped due to an error.
+ */
+enum ethtool_module_fw_flash_status {
+	ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED = 1,
+	ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS,
+	ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED,
+	ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR,
+};
+
 /**
  * struct ethtool_gstrings - string set for data tagging
  * @cmd: Command number = %ETHTOOL_GSTRINGS
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index d15856c7e001..840dabdc9d88 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -57,6 +57,7 @@ enum {
 	ETHTOOL_MSG_PLCA_GET_STATUS,
 	ETHTOOL_MSG_MM_GET,
 	ETHTOOL_MSG_MM_SET,
+	ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -109,6 +110,7 @@ enum {
 	ETHTOOL_MSG_PLCA_NTF,
 	ETHTOOL_MSG_MM_GET_REPLY,
 	ETHTOOL_MSG_MM_NTF,
+	ETHTOOL_MSG_MODULE_FW_FLASH_NTF,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -1018,6 +1020,23 @@ enum {
 	ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1)
 };
 
+/* MODULE_FW_FLASH */
+
+enum {
+	ETHTOOL_A_MODULE_FW_FLASH_UNSPEC,
+	ETHTOOL_A_MODULE_FW_FLASH_HEADER,		/* nest - _A_HEADER_* */
+	ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME,		/* string */
+	ETHTOOL_A_MODULE_FW_FLASH_PASSWORD,		/* u32 */
+	ETHTOOL_A_MODULE_FW_FLASH_STATUS,		/* u32 */
+	ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG,		/* string */
+	ETHTOOL_A_MODULE_FW_FLASH_DONE,			/* uint */
+	ETHTOOL_A_MODULE_FW_FLASH_TOTAL,		/* uint */
+
+	/* add new constants above here */
+	__ETHTOOL_A_MODULE_FW_FLASH_CNT,
+	ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
-- 
cgit v1.2.3


From 31e0aa99dc02b2b038a270b0670fc8201b69ec8a Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Thu, 27 Jun 2024 17:08:52 +0300
Subject: ethtool: Veto some operations during firmware flashing process

Some operations cannot be performed during the firmware flashing
process.

For example:

- Port must be down during the whole flashing process to avoid packet loss
  while committing reset for example.

- Writing to EEPROM interrupts the flashing process, so operations like
  ethtool dump, module reset, get and set power mode should be vetoed.

- Split port firmware flashing should be vetoed.

In order to veto those scenarios, add a flag in 'struct net_device' that
indicates when a firmware flash is taking place on the module and use it
to prevent interruptions during the process.

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc18acd3c58b..1e3401093c13 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1990,6 +1990,8 @@ enum netdev_reg_state {
  *
  *	@threaded:	napi threaded mode is enabled
  *
+ *	@module_fw_flash_in_progress:	Module firmware flashing is in progress.
+ *
  *	@net_notifier_list:	List of per-net netdev notifier block
  *				that follow this device when it is moved
  *				to another network namespace.
@@ -2374,7 +2376,7 @@ struct net_device {
 	bool			proto_down;
 	bool			threaded;
 	unsigned		wol_enabled:1;
-
+	unsigned		module_fw_flash_in_progress:1;
 	struct list_head	net_notifier_list;
 
 #if IS_ENABLED(CONFIG_MACSEC)
-- 
cgit v1.2.3


From e4f91936993ce4d0954688ec3cb80b3471b9eda5 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Thu, 27 Jun 2024 17:08:53 +0300
Subject: net: sfp: Add more extended compliance codes

SFF-8024 is used to define various constants re-used in several SFF
SFP-related specifications.

Add SFF-8024 extended compliance code definitions for CMIS compliant
modules and use them in the next patch to determine the firmware flashing
work.

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sfp.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index a45da7eef9a2..b14be59550e3 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -284,6 +284,12 @@ enum {
 	SFF8024_ID_QSFP_8438		= 0x0c,
 	SFF8024_ID_QSFP_8436_8636	= 0x0d,
 	SFF8024_ID_QSFP28_8636		= 0x11,
+	SFF8024_ID_QSFP_DD		= 0x18,
+	SFF8024_ID_OSFP			= 0x19,
+	SFF8024_ID_DSFP			= 0x1B,
+	SFF8024_ID_QSFP_PLUS_CMIS	= 0x1E,
+	SFF8024_ID_SFP_DD_CMIS		= 0x1F,
+	SFF8024_ID_SFP_PLUS_CMIS	= 0x20,
 
 	SFF8024_ENCODING_UNSPEC		= 0x00,
 	SFF8024_ENCODING_8B10B		= 0x01,
-- 
cgit v1.2.3


From 048a403648fcef8bd9f4f1a290c57b626ad16296 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@nvidia.com>
Date: Thu, 27 Jun 2024 21:02:34 +0300
Subject: net/mlx5: IFC updates for changing max EQs

Expose new capability to support changing the number of EQs available
to other functions.

Fixes: 93197c7c509d ("mlx5/core: Support max_io_eqs for a function")
Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: William Tu <witu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/mlx5_ifc.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 5df52e15f7d6..d45bfb7cf81d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2029,7 +2029,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   pcc_ifa2[0x1];
 	u8	   reserved_at_3f1[0xf];
 
-	u8	   reserved_at_400[0x400];
+	u8	   reserved_at_400[0x40];
+
+	u8	   reserved_at_440[0x8];
+	u8	   max_num_eqs_24b[0x18];
+	u8	   reserved_at_460[0x3a0];
 };
 
 enum mlx5_ifc_flow_destination_type {
-- 
cgit v1.2.3


From f9af549d1fd31487bbbc666b5b158cfc940ccc17 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Mon, 24 Jun 2024 15:40:52 -0400
Subject: fs: export mount options via statmount()

statmount() can export arbitrary strings, so utilize the __spare1 slot
for a mnt_opts string pointer, and then support asking for and setting
the mount options during statmount().  This calls into the helper for
showing mount options, which already uses a seq_file, so fits in nicely
with our existing mechanism for exporting strings via statmount().

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/3aa6bf8bd5d0a21df9ebd63813af8ab532c18276.1719257716.git.josef@toxicpanda.com
Reviewed-by: Jeff Layton <jlayton@kernel.org>
[brauner: only call sb->s_op->show_options()]
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/uapi/linux/mount.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index ee1559cd6764..225bc366ffcb 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -154,7 +154,7 @@ struct mount_attr {
  */
 struct statmount {
 	__u32 size;		/* Total size, including strings */
-	__u32 __spare1;
+	__u32 mnt_opts;		/* [str] Mount options of the mount */
 	__u64 mask;		/* What results were written */
 	__u32 sb_dev_major;	/* Device ID */
 	__u32 sb_dev_minor;
@@ -206,6 +206,7 @@ struct mnt_id_req {
 #define STATMOUNT_MNT_POINT		0x00000010U	/* Want/got mnt_point */
 #define STATMOUNT_FS_TYPE		0x00000020U	/* Want/got fs_type */
 #define STATMOUNT_MNT_NS_ID		0x00000040U	/* Want/got mnt_ns_id */
+#define STATMOUNT_MNT_OPTS		0x00000080U	/* Want/got mnt_opts */
 
 /*
  * Special @mnt_id values that can be passed to listmount
-- 
cgit v1.2.3


From 205fdba5d0ffe1ad8de61763d74323e88b640d41 Mon Sep 17 00:00:00 2001
From: James Ogletree <jogletre@opensource.cirrus.com>
Date: Thu, 20 Jun 2024 16:17:41 +0000
Subject: firmware: cs_dsp: Add write sequence interface

A write sequence is a sequence of register addresses
and values executed by some Cirrus DSPs following
certain power state transitions.

Add support for Cirrus drivers to update or add to a
write sequence present in firmware.

Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: James Ogletree <jogletre@opensource.cirrus.com>
Reviewed-by: Jeff LaBundy <jeff@labundy.com>
Link: https://lore.kernel.org/r/20240620161745.2312359-2-jogletre@opensource.cirrus.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/firmware/cirrus/cs_dsp.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h
index 82687e07a7c2..c28a6b9c3b2d 100644
--- a/include/linux/firmware/cirrus/cs_dsp.h
+++ b/include/linux/firmware/cirrus/cs_dsp.h
@@ -42,6 +42,16 @@
 #define CS_DSP_ACKED_CTL_MIN_VALUE           0
 #define CS_DSP_ACKED_CTL_MAX_VALUE           0xFFFFFF
 
+/*
+ * Write sequence operation codes
+ */
+#define CS_DSP_WSEQ_FULL	0x00
+#define CS_DSP_WSEQ_ADDR8	0x02
+#define CS_DSP_WSEQ_L16		0x04
+#define CS_DSP_WSEQ_H16		0x05
+#define CS_DSP_WSEQ_UNLOCK	0xFD
+#define CS_DSP_WSEQ_END		0xFF
+
 /**
  * struct cs_dsp_region - Describes a logical memory region in DSP address space
  * @type:	Memory region type
@@ -258,6 +268,23 @@ struct cs_dsp_alg_region *cs_dsp_find_alg_region(struct cs_dsp *dsp,
 
 const char *cs_dsp_mem_region_name(unsigned int type);
 
+/**
+ * struct cs_dsp_wseq - Describes a write sequence
+ * @ctl:	Write sequence cs_dsp control
+ * @ops:	Operations contained within
+ */
+struct cs_dsp_wseq {
+	struct cs_dsp_coeff_ctl *ctl;
+	struct list_head ops;
+};
+
+int cs_dsp_wseq_init(struct cs_dsp *dsp, struct cs_dsp_wseq *wseqs, unsigned int num_wseqs);
+int cs_dsp_wseq_write(struct cs_dsp *dsp, struct cs_dsp_wseq *wseq, u32 addr, u32 data,
+		      u8 op_code, bool update);
+int cs_dsp_wseq_multi_write(struct cs_dsp *dsp, struct cs_dsp_wseq *wseq,
+			    const struct reg_sequence *reg_seq, int num_regs,
+			    u8 op_code, bool update);
+
 /**
  * struct cs_dsp_chunk - Describes a buffer holding data formatted for the DSP
  * @data:	Pointer to underlying buffer memory
-- 
cgit v1.2.3


From cb626376cbd00cd69329371519a8e9568baef715 Mon Sep 17 00:00:00 2001
From: James Ogletree <jogletre@opensource.cirrus.com>
Date: Thu, 20 Jun 2024 16:17:43 +0000
Subject: mfd: cs40l50: Add support for CS40L50 core driver

Introduce support for Cirrus Logic Device CS40L50: a
haptic driver with waveform memory, integrated DSP,
and closed-loop algorithms.

The MFD component registers and initializes the device.

Signed-off-by: James Ogletree <jogletre@opensource.cirrus.com>
Reviewed-by: Jeff LaBundy <jeff@labundy.com>
Link: https://lore.kernel.org/r/20240620161745.2312359-4-jogletre@opensource.cirrus.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/cs40l50.h | 137 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 include/linux/mfd/cs40l50.h

(limited to 'include')

diff --git a/include/linux/mfd/cs40l50.h b/include/linux/mfd/cs40l50.h
new file mode 100644
index 000000000000..e5dc49860944
--- /dev/null
+++ b/include/linux/mfd/cs40l50.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * CS40L50 Advanced Haptic Driver with waveform memory,
+ * integrated DSP, and closed-loop algorithms
+ *
+ * Copyright 2024 Cirrus Logic, Inc.
+ *
+ * Author: James Ogletree <james.ogletree@cirrus.com>
+ */
+
+#ifndef __MFD_CS40L50_H__
+#define __MFD_CS40L50_H__
+
+#include <linux/firmware/cirrus/cs_dsp.h>
+#include <linux/gpio/consumer.h>
+#include <linux/pm.h>
+#include <linux/regmap.h>
+
+/* Power Supply Configuration */
+#define CS40L50_BLOCK_ENABLES2		0x201C
+#define CS40L50_ERR_RLS			0x2034
+#define CS40L50_BST_LPMODE_SEL		0x3810
+#define CS40L50_DCM_LOW_POWER		0x1
+#define CS40L50_OVERTEMP_WARN		0x4000010
+
+/* Interrupts */
+#define CS40L50_IRQ1_INT_1		0xE010
+#define CS40L50_IRQ1_BASE		CS40L50_IRQ1_INT_1
+#define CS40L50_IRQ1_INT_2		0xE014
+#define CS40L50_IRQ1_INT_8		0xE02C
+#define CS40L50_IRQ1_INT_9		0xE030
+#define CS40L50_IRQ1_INT_10		0xE034
+#define CS40L50_IRQ1_INT_18		0xE054
+#define CS40L50_IRQ1_MASK_1		0xE090
+#define CS40L50_IRQ1_MASK_2		0xE094
+#define CS40L50_IRQ1_MASK_20		0xE0DC
+#define CS40L50_IRQ1_INT_1_OFFSET	(CS40L50_IRQ1_INT_1 - CS40L50_IRQ1_BASE)
+#define CS40L50_IRQ1_INT_2_OFFSET	(CS40L50_IRQ1_INT_2 - CS40L50_IRQ1_BASE)
+#define CS40L50_IRQ1_INT_8_OFFSET	(CS40L50_IRQ1_INT_8 - CS40L50_IRQ1_BASE)
+#define CS40L50_IRQ1_INT_9_OFFSET	(CS40L50_IRQ1_INT_9 - CS40L50_IRQ1_BASE)
+#define CS40L50_IRQ1_INT_10_OFFSET	(CS40L50_IRQ1_INT_10 - CS40L50_IRQ1_BASE)
+#define CS40L50_IRQ1_INT_18_OFFSET	(CS40L50_IRQ1_INT_18 - CS40L50_IRQ1_BASE)
+#define CS40L50_IRQ_MASK_2_OVERRIDE	0xFFDF7FFF
+#define CS40L50_IRQ_MASK_20_OVERRIDE	0x15C01000
+#define CS40L50_AMP_SHORT_MASK		BIT(31)
+#define CS40L50_DSP_QUEUE_MASK		BIT(21)
+#define CS40L50_TEMP_ERR_MASK		BIT(31)
+#define CS40L50_BST_UVP_MASK		BIT(6)
+#define CS40L50_BST_SHORT_MASK		BIT(7)
+#define CS40L50_BST_ILIMIT_MASK		BIT(18)
+#define CS40L50_UVLO_VDDBATT_MASK	BIT(16)
+#define CS40L50_GLOBAL_ERROR_MASK	BIT(15)
+
+enum cs40l50_irq_list {
+	CS40L50_DSP_QUEUE_IRQ,
+	CS40L50_GLOBAL_ERROR_IRQ,
+	CS40L50_UVLO_VDDBATT_IRQ,
+	CS40L50_BST_ILIMIT_IRQ,
+	CS40L50_BST_SHORT_IRQ,
+	CS40L50_BST_UVP_IRQ,
+	CS40L50_TEMP_ERR_IRQ,
+	CS40L50_AMP_SHORT_IRQ,
+};
+
+/* DSP */
+#define CS40L50_XMEM_PACKED_0		0x2000000
+#define CS40L50_XMEM_UNPACKED24_0	0x2800000
+#define CS40L50_SYS_INFO_ID		0x25E0000
+#define CS40L50_DSP_QUEUE_WT		0x28042C8
+#define CS40L50_DSP_QUEUE_RD		0x28042CC
+#define CS40L50_NUM_WAVES		0x2805C18
+#define CS40L50_CORE_BASE		0x2B80000
+#define CS40L50_YMEM_PACKED_0		0x2C00000
+#define CS40L50_YMEM_UNPACKED24_0	0x3400000
+#define CS40L50_PMEM_0			0x3800000
+#define CS40L50_DSP_POLL_US		1000
+#define CS40L50_DSP_TIMEOUT_COUNT	100
+#define CS40L50_RESET_PULSE_US		2200
+#define CS40L50_CP_READY_US		3100
+#define CS40L50_AUTOSUSPEND_MS		2000
+#define CS40L50_PM_ALGO			0x9F206
+#define CS40L50_GLOBAL_ERR_RLS_SET	BIT(11)
+#define CS40L50_GLOBAL_ERR_RLS_CLEAR	0
+
+enum cs40l50_wseqs {
+	CS40L50_PWR_ON,
+	CS40L50_STANDBY,
+	CS40L50_ACTIVE,
+	CS40L50_NUM_WSEQS,
+};
+
+/* DSP Queue */
+#define CS40L50_DSP_QUEUE_BASE		0x11004
+#define CS40L50_DSP_QUEUE_END		0x1101C
+#define CS40L50_DSP_QUEUE		0x11020
+#define CS40L50_PREVENT_HIBER		0x2000003
+#define CS40L50_ALLOW_HIBER		0x2000004
+#define CS40L50_SHUTDOWN		0x2000005
+#define CS40L50_SYSTEM_RESET		0x2000007
+#define CS40L50_START_I2S		0x3000002
+#define CS40L50_OWT_PUSH		0x3000008
+#define CS40L50_STOP_PLAYBACK		0x5000000
+#define CS40L50_OWT_DELETE		0xD000000
+
+/* Firmware files */
+#define CS40L50_FW			"cs40l50.wmfw"
+#define CS40L50_WT			"cs40l50.bin"
+
+/* Device */
+#define CS40L50_DEVID			0x0
+#define CS40L50_REVID			0x4
+#define CS40L50_DEVID_A			0x40A50
+#define CS40L50_REVID_B0		0xB0
+
+struct cs40l50 {
+	struct device *dev;
+	struct regmap *regmap;
+	struct mutex lock;
+	struct cs_dsp dsp;
+	struct gpio_desc *reset_gpio;
+	struct regmap_irq_chip_data *irq_data;
+	const struct firmware *fw;
+	const struct firmware *bin;
+	struct cs_dsp_wseq wseqs[CS40L50_NUM_WSEQS];
+	int irq;
+	u32 devid;
+	u32 revid;
+};
+
+int cs40l50_dsp_write(struct device *dev, struct regmap *regmap, u32 val);
+int cs40l50_probe(struct cs40l50 *cs40l50);
+int cs40l50_remove(struct cs40l50 *cs40l50);
+
+extern const struct regmap_config cs40l50_regmap;
+extern const struct dev_pm_ops cs40l50_pm_ops;
+
+#endif /* __MFD_CS40L50_H__ */
-- 
cgit v1.2.3


From 860f8e3beac0b800bbe20f23c5f3440b1c470b8f Mon Sep 17 00:00:00 2001
From: Karel Balej <balejk@matfyz.cz>
Date: Fri, 31 May 2024 19:34:57 +0200
Subject: mfd: Add driver for Marvell 88PM886 PMIC

Marvell 88PM886 is a PMIC which provides various functions such as
onkey, battery, charger and regulators. It is found for instance in the
samsung,coreprimevelte smartphone with which this was tested. Implement
basic support to allow for the use of regulators and onkey.

Signed-off-by: Karel Balej <balejk@matfyz.cz>
Link: https://lore.kernel.org/r/20240531175109.15599-3-balejk@matfyz.cz
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/88pm886.h | 69 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 include/linux/mfd/88pm886.h

(limited to 'include')

diff --git a/include/linux/mfd/88pm886.h b/include/linux/mfd/88pm886.h
new file mode 100644
index 000000000000..133aa302e492
--- /dev/null
+++ b/include/linux/mfd/88pm886.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __MFD_88PM886_H
+#define __MFD_88PM886_H
+
+#include <linux/i2c.h>
+#include <linux/regmap.h>
+
+#define PM886_A1_CHIP_ID		0xa1
+
+#define PM886_IRQ_ONKEY			0
+
+#define PM886_PAGE_OFFSET_REGULATORS	1
+
+#define PM886_REG_ID			0x00
+
+#define PM886_REG_STATUS1		0x01
+#define PM886_ONKEY_STS1		BIT(0)
+
+#define PM886_REG_INT_STATUS1		0x05
+
+#define PM886_REG_INT_ENA_1		0x0a
+#define PM886_INT_ENA1_ONKEY		BIT(0)
+
+#define PM886_REG_MISC_CONFIG1		0x14
+#define PM886_SW_PDOWN			BIT(5)
+
+#define PM886_REG_MISC_CONFIG2		0x15
+#define PM886_INT_INV			BIT(0)
+#define PM886_INT_CLEAR			BIT(1)
+#define PM886_INT_RC			0x00
+#define PM886_INT_WC			BIT(1)
+#define PM886_INT_MASK_MODE		BIT(2)
+
+#define PM886_REG_RTC_SPARE6		0xef
+
+#define PM886_REG_BUCK_EN		0x08
+#define PM886_REG_LDO_EN1		0x09
+#define PM886_REG_LDO_EN2		0x0a
+#define PM886_REG_LDO1_VOUT		0x20
+#define PM886_REG_LDO2_VOUT		0x26
+#define PM886_REG_LDO3_VOUT		0x2c
+#define PM886_REG_LDO4_VOUT		0x32
+#define PM886_REG_LDO5_VOUT		0x38
+#define PM886_REG_LDO6_VOUT		0x3e
+#define PM886_REG_LDO7_VOUT		0x44
+#define PM886_REG_LDO8_VOUT		0x4a
+#define PM886_REG_LDO9_VOUT		0x50
+#define PM886_REG_LDO10_VOUT		0x56
+#define PM886_REG_LDO11_VOUT		0x5c
+#define PM886_REG_LDO12_VOUT		0x62
+#define PM886_REG_LDO13_VOUT		0x68
+#define PM886_REG_LDO14_VOUT		0x6e
+#define PM886_REG_LDO15_VOUT		0x74
+#define PM886_REG_LDO16_VOUT		0x7a
+#define PM886_REG_BUCK1_VOUT		0xa5
+#define PM886_REG_BUCK2_VOUT		0xb3
+#define PM886_REG_BUCK3_VOUT		0xc1
+#define PM886_REG_BUCK4_VOUT		0xcf
+#define PM886_REG_BUCK5_VOUT		0xdd
+
+#define PM886_LDO_VSEL_MASK		0x0f
+#define PM886_BUCK_VSEL_MASK		0x7f
+
+struct pm886_chip {
+	struct i2c_client *client;
+	unsigned int chip_id;
+	struct regmap *regmap;
+};
+#endif /* __MFD_88PM886_H */
-- 
cgit v1.2.3


From d19b46340b3c0ea66bef0f6c58876cc085813ba8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 26 Jun 2024 06:59:38 +0200
Subject: block: remove bio_integrity_process

Move the bvec interation into the generate/verify helpers to avoid a bit
of argument passing churn.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240626045950.189758-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-integrity.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index c58634924782..804f856ed3e5 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -14,15 +14,6 @@ enum blk_integrity_flags {
 	BLK_INTEGRITY_STACKED		= 1 << 4,
 };
 
-struct blk_integrity_iter {
-	void			*prot_buf;
-	void			*data_buf;
-	sector_t		seed;
-	unsigned int		data_size;
-	unsigned short		interval;
-	const char		*disk_name;
-};
-
 const char *blk_integrity_profile_name(struct blk_integrity *bi);
 bool queue_limits_stack_integrity(struct queue_limits *t,
 		struct queue_limits *b);
-- 
cgit v1.2.3


From aa6ff4eb7c10d9a6532db3ea9e78124bf14e70ae Mon Sep 17 00:00:00 2001
From: Dongliang Cui <dongliang.cui@unisoc.com>
Date: Fri, 14 Jun 2024 15:49:36 +0800
Subject: block: Add ioprio to block_rq tracepoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sometimes we need to track the processing order of requests with
ioprio set. So the ioprio of request can be useful information.

Example：

block_rq_insert: 8,0 RA 16384 () 6500840 + 32 be,0,6 [binder:815_3]
block_rq_issue: 8,0 RA 16384 () 6500840 + 32 be,0,6 [binder:815_3]
block_rq_complete: 8,0 RA () 6500840 + 32 be,0,6 [0]

Signed-off-by: Dongliang Cui <dongliang.cui@unisoc.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20240614074936.113659-1-dongliang.cui@unisoc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/trace/events/block.h | 41 ++++++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 0e128ad51460..1527d5d45e01 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -9,9 +9,17 @@
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/tracepoint.h>
+#include <uapi/linux/ioprio.h>
 
 #define RWBS_LEN	8
 
+#define IOPRIO_CLASS_STRINGS \
+	{ IOPRIO_CLASS_NONE,	"none" }, \
+	{ IOPRIO_CLASS_RT,	"rt" }, \
+	{ IOPRIO_CLASS_BE,	"be" }, \
+	{ IOPRIO_CLASS_IDLE,	"idle" }, \
+	{ IOPRIO_CLASS_INVALID,	"invalid"}
+
 #ifdef CONFIG_BUFFER_HEAD
 DECLARE_EVENT_CLASS(block_buffer,
 
@@ -82,6 +90,7 @@ TRACE_EVENT(block_rq_requeue,
 		__field(  dev_t,	dev			)
 		__field(  sector_t,	sector			)
 		__field(  unsigned int,	nr_sector		)
+		__field(  unsigned short, ioprio		)
 		__array(  char,		rwbs,	RWBS_LEN	)
 		__dynamic_array( char,	cmd,	1		)
 	),
@@ -90,16 +99,20 @@ TRACE_EVENT(block_rq_requeue,
 		__entry->dev	   = rq->q->disk ? disk_devt(rq->q->disk) : 0;
 		__entry->sector    = blk_rq_trace_sector(rq);
 		__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
+		__entry->ioprio    = rq->ioprio;
 
 		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
 		__get_str(cmd)[0] = '\0';
 	),
 
-	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+	TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __get_str(cmd),
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, 0)
+		  (unsigned long long)__entry->sector, __entry->nr_sector,
+		  __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
+				   IOPRIO_CLASS_STRINGS),
+		  IOPRIO_PRIO_HINT(__entry->ioprio),
+		  IOPRIO_PRIO_LEVEL(__entry->ioprio),  0)
 );
 
 DECLARE_EVENT_CLASS(block_rq_completion,
@@ -113,6 +126,7 @@ DECLARE_EVENT_CLASS(block_rq_completion,
 		__field(  sector_t,	sector			)
 		__field(  unsigned int,	nr_sector		)
 		__field(  int	,	error			)
+		__field(  unsigned short, ioprio		)
 		__array(  char,		rwbs,	RWBS_LEN	)
 		__dynamic_array( char,	cmd,	1		)
 	),
@@ -122,16 +136,20 @@ DECLARE_EVENT_CLASS(block_rq_completion,
 		__entry->sector    = blk_rq_pos(rq);
 		__entry->nr_sector = nr_bytes >> 9;
 		__entry->error     = blk_status_to_errno(error);
+		__entry->ioprio    = rq->ioprio;
 
 		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
 		__get_str(cmd)[0] = '\0';
 	),
 
-	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+	TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __get_str(cmd),
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->error)
+		  (unsigned long long)__entry->sector, __entry->nr_sector,
+		  __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
+				   IOPRIO_CLASS_STRINGS),
+		  IOPRIO_PRIO_HINT(__entry->ioprio),
+		  IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->error)
 );
 
 /**
@@ -180,6 +198,7 @@ DECLARE_EVENT_CLASS(block_rq,
 		__field(  sector_t,	sector			)
 		__field(  unsigned int,	nr_sector		)
 		__field(  unsigned int,	bytes			)
+		__field(  unsigned short, ioprio		)
 		__array(  char,		rwbs,	RWBS_LEN	)
 		__array(  char,         comm,   TASK_COMM_LEN   )
 		__dynamic_array( char,	cmd,	1		)
@@ -190,17 +209,21 @@ DECLARE_EVENT_CLASS(block_rq,
 		__entry->sector    = blk_rq_trace_sector(rq);
 		__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
 		__entry->bytes     = blk_rq_bytes(rq);
+		__entry->ioprio	   = rq->ioprio;
 
 		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
 		__get_str(cmd)[0] = '\0';
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
-	TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+	TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u,%u [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __entry->bytes, __get_str(cmd),
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm)
+		  (unsigned long long)__entry->sector, __entry->nr_sector,
+		  __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
+				   IOPRIO_CLASS_STRINGS),
+		  IOPRIO_PRIO_HINT(__entry->ioprio),
+		  IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->comm)
 );
 
 /**
-- 
cgit v1.2.3


From c1385c1f0ba3b80bd12f26c440612175088c664c Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Wed, 29 May 2024 14:34:28 +0100
Subject: ACPI: processor: Simplify initial onlining to use same path for cold
 and hotplug

Separate code paths, combined with a flag set in acpi_processor.c to
indicate a struct acpi_processor was for a hotplugged CPU ensured that
per CPU data was only set up the first time that a CPU was initialized.
This appears to be unnecessary as the paths can be combined by letting
the online logic also handle any CPUs online at the time of driver load.

Motivation for this change, beyond simplification, is that ARM64
virtual CPU HP uses the same code paths for hotplug and cold path in
acpi_processor.c so had no easy way to set the flag for hotplug only.
Removing this necessity will enable ARM64 vCPU HP to reuse the existing
code paths.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Tested-by: Miguel Luis <miguel.luis@oracle.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Miguel Luis <miguel.luis@oracle.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20240529133446.28446-2-Jonathan.Cameron@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/acpi/processor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 3f34ebb27525..e6f6074eadbf 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -217,7 +217,7 @@ struct acpi_processor_flags {
 	u8 has_lpi:1;
 	u8 power_setup_done:1;
 	u8 bm_rld_set:1;
-	u8 need_hotplug_init:1;
+	u8 previously_online:1;
 };
 
 struct acpi_processor {
-- 
cgit v1.2.3


From 36b921637e900c77f5f9bd5b45db522124068a96 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Wed, 29 May 2024 14:34:34 +0100
Subject: ACPI: processor: Add acpi_get_processor_handle() helper

If CONFIG_ACPI_PROCESSOR provide a helper to retrieve the
acpi_handle for a given CPU allowing access to methods
in DSDT.

Tested-by: Miguel Luis <miguel.luis@oracle.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20240529133446.28446-8-Jonathan.Cameron@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/acpi.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 28c3fb2bef0d..fd45bfab66b8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -304,6 +304,8 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
 int acpi_unmap_cpu(int cpu);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
+acpi_handle acpi_get_processor_handle(int cpu);
+
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
 #endif
@@ -1076,6 +1078,11 @@ static inline bool acpi_sleep_state_supported(u8 sleep_state)
 	return false;
 }
 
+static inline acpi_handle acpi_get_processor_handle(int cpu)
+{
+	return NULL;
+}
+
 #endif	/* !CONFIG_ACPI */
 
 extern void arch_post_acpi_subsys_init(void);
-- 
cgit v1.2.3


From 3b9d0a78aeda194994892f7c42f6ca5feb45eadd Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 29 May 2024 14:34:37 +0100
Subject: ACPI: Add post_eject to struct acpi_scan_handler for cpu hotplug

struct acpi_scan_handler has a detach callback that is used to remove
a driver when a bus is changed. When interacting with an eject-request,
the detach callback is called before _EJ0.

This means the ACPI processor driver can't use _STA to determine if a
CPU has been made not-present, or some of the other _STA bits have been
changed. acpi_processor_remove() needs to know the value of _STA after
_EJ0 has been called.

Add a post_eject callback to struct acpi_scan_handler. This is called
after acpi_scan_hot_remove() has successfully called _EJ0. Because
acpi_scan_check_and_detach() also clears the handler pointer,
it needs to be told if the caller will go on to call
acpi_bus_post_eject(), so that acpi_device_clear_enumerated()
and clearing the handler pointer can be deferred.
An extra flag is added to flags field introduced in the previous
patch to achieve this.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Tested-by: Miguel Luis <miguel.luis@oracle.com>
Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
Tested-by: Jianyong Wu <jianyong.wu@arm.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20240529133446.28446-11-Jonathan.Cameron@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/acpi/acpi_bus.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 1a4dfd7a1c4a..05ca49478537 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -134,6 +134,7 @@ struct acpi_scan_handler {
 	bool (*match)(const char *idstr, const struct acpi_device_id **matchid);
 	int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id);
 	void (*detach)(struct acpi_device *dev);
+	void (*post_eject)(struct acpi_device *dev);
 	void (*bind)(struct device *phys_dev);
 	void (*unbind)(struct device *phys_dev);
 	struct acpi_hotplug_profile hotplug;
-- 
cgit v1.2.3


From d633da5d3ab1a0eb26a2213d65da1e189e82f8ab Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 29 May 2024 14:34:41 +0100
Subject: irqchip/gic-v3: Add support for ACPI's disabled but 'online capable'
 CPUs

To support virtual CPU hotplug, ACPI has added an 'online capable' bit
to the MADT GICC entries. This indicates a disabled CPU entry may not
be possible to online via PSCI until firmware has set enabled bit in
_STA.

This means that a "usable" GIC redistributor is one that is marked as
either enabled, or online capable. The meaning of the
acpi_gicc_is_usable() would become less clear than just checking the
pair of flags at call sites. As such, drop that helper function.
The test in gic_acpi_match_gicc() remains as testing just the
enabled bit so the count of enabled distributors is correct.

What about the redistributor in the GICC entry? ACPI doesn't want to say.
Assume the worst: When a redistributor is described in the GICC entry,
but the entry is marked as disabled at boot, assume the redistributor
is inaccessible.

The GICv3 driver doesn't support late online of redistributors, so this
means the corresponding CPU can't be brought online either.
Rather than modifying cpu masks that may already have been used,
register a new cpuhp callback to fail this case. This must run earlier
than the main gic_starting_cpu() so that this case can be rejected
before the section of cpuhp that runs on the CPU that is coming up as
that is not allowed to fail. This solution keeps the handling of this
broken firmware corner case local to the GIC driver. As precise ordering
of this callback doesn't need to be controlled as long as it is
in that initial prepare phase, use CPUHP_BP_PREPARE_DYN.

Systems that want CPU hotplug in a VM can ensure their redistributors
are always-on, and describe them that way with a GICR entry in the MADT.

Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Tested-by: Miguel Luis <miguel.luis@oracle.com>
Co-developed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20240529133446.28446-15-Jonathan.Cameron@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/acpi.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index fd45bfab66b8..9f8c9d29b035 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -237,11 +237,6 @@ acpi_table_parse_cedt(enum acpi_cedt_type id,
 int acpi_parse_mcfg (struct acpi_table_header *header);
 void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
 
-static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc)
-{
-	return gicc->flags & ACPI_MADT_ENABLED;
-}
-
 #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
 void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa);
 #else
-- 
cgit v1.2.3


From 4e1a7df4548003fc081360b0f4edce3f7a991bfc Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 29 May 2024 14:34:46 +0100
Subject: cpumask: Add enabled cpumask for present CPUs that can be brought
 online

The 'offline' file in sysfs shows all offline CPUs, including those
that aren't present. User-space is expected to remove not-present CPUs
from this list to learn which CPUs could be brought online.

CPUs can be present but not-enabled. These CPUs can't be brought online
until the firmware policy changes, which comes with an ACPI notification
that will register the CPUs.

With only the offline and present files, user-space is unable to
determine which CPUs it can try to bring online. Add a new CPU mask
that shows this based on all the registered CPUs.

Signed-off-by: James Morse <james.morse@arm.com>
Tested-by: Miguel Luis <miguel.luis@oracle.com>
Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
Tested-by: Jianyong Wu <jianyong.wu@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20240529133446.28446-20-Jonathan.Cameron@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/cpumask.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 23686bed441d..954d4adc8f81 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -93,6 +93,7 @@ static inline void set_nr_cpu_ids(unsigned int nr)
  *
  *     cpu_possible_mask- has bit 'cpu' set iff cpu is populatable
  *     cpu_present_mask - has bit 'cpu' set iff cpu is populated
+ *     cpu_enabled_mask  - has bit 'cpu' set iff cpu can be brought online
  *     cpu_online_mask  - has bit 'cpu' set iff cpu available to scheduler
  *     cpu_active_mask  - has bit 'cpu' set iff cpu available to migration
  *
@@ -125,11 +126,13 @@ static inline void set_nr_cpu_ids(unsigned int nr)
 
 extern struct cpumask __cpu_possible_mask;
 extern struct cpumask __cpu_online_mask;
+extern struct cpumask __cpu_enabled_mask;
 extern struct cpumask __cpu_present_mask;
 extern struct cpumask __cpu_active_mask;
 extern struct cpumask __cpu_dying_mask;
 #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
 #define cpu_online_mask   ((const struct cpumask *)&__cpu_online_mask)
+#define cpu_enabled_mask   ((const struct cpumask *)&__cpu_enabled_mask)
 #define cpu_present_mask  ((const struct cpumask *)&__cpu_present_mask)
 #define cpu_active_mask   ((const struct cpumask *)&__cpu_active_mask)
 #define cpu_dying_mask    ((const struct cpumask *)&__cpu_dying_mask)
@@ -1075,6 +1078,7 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
 #else
 #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
 #define for_each_online_cpu(cpu)   for_each_cpu((cpu), cpu_online_mask)
+#define for_each_enabled_cpu(cpu)   for_each_cpu((cpu), cpu_enabled_mask)
 #define for_each_present_cpu(cpu)  for_each_cpu((cpu), cpu_present_mask)
 #endif
 
@@ -1092,6 +1096,15 @@ set_cpu_possible(unsigned int cpu, bool possible)
 		cpumask_clear_cpu(cpu, &__cpu_possible_mask);
 }
 
+static inline void
+set_cpu_enabled(unsigned int cpu, bool can_be_onlined)
+{
+	if (can_be_onlined)
+		cpumask_set_cpu(cpu, &__cpu_enabled_mask);
+	else
+		cpumask_clear_cpu(cpu, &__cpu_enabled_mask);
+}
+
 static inline void
 set_cpu_present(unsigned int cpu, bool present)
 {
@@ -1173,6 +1186,7 @@ static __always_inline unsigned int num_online_cpus(void)
 	return raw_atomic_read(&__num_online_cpus);
 }
 #define num_possible_cpus()	cpumask_weight(cpu_possible_mask)
+#define num_enabled_cpus()	cpumask_weight(cpu_enabled_mask)
 #define num_present_cpus()	cpumask_weight(cpu_present_mask)
 #define num_active_cpus()	cpumask_weight(cpu_active_mask)
 
@@ -1181,6 +1195,11 @@ static inline bool cpu_online(unsigned int cpu)
 	return cpumask_test_cpu(cpu, cpu_online_mask);
 }
 
+static inline bool cpu_enabled(unsigned int cpu)
+{
+	return cpumask_test_cpu(cpu, cpu_enabled_mask);
+}
+
 static inline bool cpu_possible(unsigned int cpu)
 {
 	return cpumask_test_cpu(cpu, cpu_possible_mask);
@@ -1205,6 +1224,7 @@ static inline bool cpu_dying(unsigned int cpu)
 
 #define num_online_cpus()	1U
 #define num_possible_cpus()	1U
+#define num_enabled_cpus()	1U
 #define num_present_cpus()	1U
 #define num_active_cpus()	1U
 
@@ -1218,6 +1238,11 @@ static inline bool cpu_possible(unsigned int cpu)
 	return cpu == 0;
 }
 
+static inline bool cpu_enabled(unsigned int cpu)
+{
+	return cpu == 0;
+}
+
 static inline bool cpu_present(unsigned int cpu)
 {
 	return cpu == 0;
-- 
cgit v1.2.3


From 791ed23f735b0fb1b984772edddf1571195780d8 Mon Sep 17 00:00:00 2001
From: Vladimir Lypak <vladimir.lypak@gmail.com>
Date: Fri, 28 Jun 2024 16:01:44 +0200
Subject: dt-bindings: interconnect: qcom: Add Qualcomm MSM8953 NoC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the device-tree bindings for interconnect providers
used on MSM8953 platform.

Signed-off-by: Vladimir Lypak <vladimir.lypak@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Barnabás Czémán <barnabas.czeman@mainlining.org>
Link: https://lore.kernel.org/r/20240628-msm8953-interconnect-v3-1-a70d582182dc@mainlining.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 include/dt-bindings/interconnect/qcom,msm8953.h | 93 +++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 include/dt-bindings/interconnect/qcom,msm8953.h

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/qcom,msm8953.h b/include/dt-bindings/interconnect/qcom,msm8953.h
new file mode 100644
index 000000000000..12564c434af7
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,msm8953.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Qualcomm MSM8953 interconnect IDs
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_MSM8953_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_MSM8953_H
+
+/* BIMC fabric */
+#define MAS_APPS_PROC		0
+#define MAS_OXILI		1
+#define MAS_SNOC_BIMC_0		2
+#define MAS_SNOC_BIMC_2		3
+#define MAS_SNOC_BIMC_1		4
+#define MAS_TCU_0		5
+#define SLV_EBI			6
+#define SLV_BIMC_SNOC		7
+
+/* PCNOC fabric */
+#define MAS_SPDM		0
+#define MAS_BLSP_1		1
+#define MAS_BLSP_2		2
+#define MAS_USB3		3
+#define MAS_CRYPTO		4
+#define MAS_SDCC_1		5
+#define MAS_SDCC_2		6
+#define MAS_SNOC_PCNOC		7
+#define PCNOC_M_0		8
+#define PCNOC_M_1		9
+#define PCNOC_INT_1		10
+#define PCNOC_INT_2		11
+#define PCNOC_S_0		12
+#define PCNOC_S_1		13
+#define PCNOC_S_2		14
+#define PCNOC_S_3		15
+#define PCNOC_S_4		16
+#define PCNOC_S_6		17
+#define PCNOC_S_7		18
+#define PCNOC_S_8		19
+#define PCNOC_S_9		20
+#define SLV_SPDM		21
+#define SLV_PDM			22
+#define SLV_TCSR		23
+#define SLV_SNOC_CFG		24
+#define SLV_TLMM		25
+#define SLV_MESSAGE_RAM		26
+#define SLV_BLSP_1		27
+#define SLV_BLSP_2		28
+#define SLV_PRNG		29
+#define SLV_CAMERA_SS_CFG	30
+#define SLV_DISP_SS_CFG		31
+#define SLV_VENUS_CFG		32
+#define SLV_GPU_CFG		33
+#define SLV_SDCC_1		34
+#define SLV_SDCC_2		35
+#define SLV_CRYPTO_0_CFG	36
+#define SLV_PMIC_ARB		37
+#define SLV_USB3		38
+#define SLV_IPA_CFG		39
+#define SLV_TCU			40
+#define SLV_PCNOC_SNOC		41
+
+/* SNOC fabric */
+#define MAS_QDSS_BAM		0
+#define MAS_BIMC_SNOC		1
+#define MAS_PCNOC_SNOC		2
+#define MAS_IPA			3
+#define MAS_QDSS_ETR		4
+#define QDSS_INT		5
+#define SNOC_INT_0		6
+#define SNOC_INT_1		7
+#define SNOC_INT_2		8
+#define SLV_KPSS_AHB		9
+#define SLV_WCSS		10
+#define SLV_SNOC_BIMC_1		11
+#define SLV_IMEM		12
+#define SLV_SNOC_PCNOC		13
+#define SLV_QDSS_STM		14
+#define SLV_CATS_1		15
+#define SLV_LPASS		16
+
+/* SNOC-MM fabric */
+#define MAS_JPEG		0
+#define MAS_MDP			1
+#define MAS_VENUS		2
+#define MAS_VFE0		3
+#define MAS_VFE1		4
+#define MAS_CPP			5
+#define SLV_SNOC_BIMC_0		6
+#define SLV_SNOC_BIMC_2		7
+#define SLV_CATS_0		8
+
+#endif /* __DT_BINDINGS_INTERCONNECT_QCOM_MSM8953_H */
-- 
cgit v1.2.3


From ed2a2ef16a6b9197a0e452308bf6acee6e01f709 Mon Sep 17 00:00:00 2001
From: Sven Peter <sven@svenpeter.dev>
Date: Wed, 15 May 2024 18:02:58 +0000
Subject: Bluetooth: Add quirk to ignore reserved PHY bits in LE Extended Adv
 Report

Some Broadcom controllers found on Apple Silicon machines abuse the
reserved bits inside the PHY fields of LE Extended Advertising Report
events for additional flags. Add a quirk to drop these and correctly
extract the Primary/Secondary_PHY field.

The following excerpt from a btmon trace shows a report received with
"Reserved" for "Primary PHY" on a 4388 controller:

> HCI Event: LE Meta Event (0x3e) plen 26
      LE Extended Advertising Report (0x0d)
        Num reports: 1
        Entry 0
          Event type: 0x2515
            Props: 0x0015
              Connectable
              Directed
              Use legacy advertising PDUs
            Data status: Complete
            Reserved (0x2500)
         Legacy PDU Type: Reserved (0x2515)
          Address type: Random (0x01)
          Address: 00:00:00:00:00:00 (Static)
          Primary PHY: Reserved
          Secondary PHY: No packets
          SID: no ADI field (0xff)
          TX power: 127 dBm
          RSSI: -60 dBm (0xc4)
          Periodic advertising interval: 0.00 msec (0x0000)
          Direct address type: Public (0x00)
          Direct address: 00:00:00:00:00:00 (Apple, Inc.)
          Data length: 0x00

Cc: stable@vger.kernel.org
Fixes: 2e7ed5f5e69b ("Bluetooth: hci_sync: Use advertised PHYs on hci_le_ext_create_conn_sync")
Reported-by: Janne Grunau <j@jannau.net>
Closes: https://lore.kernel.org/all/Zjz0atzRhFykROM9@robin
Tested-by: Janne Grunau <j@jannau.net>
Signed-off-by: Sven Peter <sven@svenpeter.dev>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index fe932ca3bc8c..e372a88e8c3f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -324,6 +324,17 @@ enum {
 	 * claim to support it.
 	 */
 	HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE,
+
+	/*
+	 * When this quirk is set, the reserved bits of Primary/Secondary_PHY
+	 * inside the LE Extended Advertising Report events are discarded.
+	 * This is required for some Apple/Broadcom controllers which
+	 * abuse these reserved bits for unrelated flags.
+	 *
+	 * This quirk can be set before hci_register_dev is called or
+	 * during the hdev->setup vendor callback.
+	 */
+	HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY,
 };
 
 /* HCI device flags */
-- 
cgit v1.2.3


From f1a8f402f13f94263cf349216c257b2985100927 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 24 Jun 2024 09:42:09 -0400
Subject: Bluetooth: L2CAP: Fix deadlock

This fixes the following deadlock introduced by 39a92a55be13
("bluetooth/l2cap: sync sock recv cb and release")

============================================
WARNING: possible recursive locking detected
6.10.0-rc3-g4029dba6b6f1 #6823 Not tainted
--------------------------------------------
kworker/u5:0/35 is trying to acquire lock:
ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
l2cap_sock_recv_cb+0x44/0x1e0

but task is already holding lock:
ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
l2cap_get_chan_by_scid+0xaf/0xd0

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&chan->lock#2/1);
  lock(&chan->lock#2/1);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

3 locks held by kworker/u5:0/35:
 #0: ffff888002b8a940 ((wq_completion)hci0#2){+.+.}-{0:0}, at:
process_one_work+0x750/0x930
 #1: ffff888002c67dd0 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0},
at: process_one_work+0x44e/0x930
 #2: ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at:
l2cap_get_chan_by_scid+0xaf/0xd0

To fix the original problem this introduces l2cap_chan_lock at
l2cap_conless_channel to ensure that l2cap_sock_recv_cb is called with
chan->lock held.

Fixes: 89e856e124f9 ("bluetooth/l2cap: sync sock recv cb and release")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_sync.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 6a9d063e9f47..534c3386e714 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -38,6 +38,8 @@ int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
 int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
 			     const void *param, u8 event, u32 timeout,
 			     struct sock *sk);
+int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen,
+			const void *param, u32 timeout);
 
 void hci_cmd_sync_init(struct hci_dev *hdev);
 void hci_cmd_sync_clear(struct hci_dev *hdev);
-- 
cgit v1.2.3


From 43c0226c9ba56ffebdef9a858dbcf6eb0d376291 Mon Sep 17 00:00:00 2001
From: Dhruva Gole <d-gole@ti.com>
Date: Thu, 27 Jun 2024 11:31:17 +0530
Subject: cpufreq: make cpufreq_boost_enabled() return bool

Since this function is supposed to return boost_enabled which is anyway
a bool type make sure that it's return value is also marked as bool.
This helps maintain better consistency in data types being used.

Signed-off-by: Dhruva Gole <d-gole@ti.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://patch.msgid.link/20240627060117.1809477-1-d-gole@ti.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 20f7e98ee8af..6f57de7de433 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -785,7 +785,7 @@ ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf);
 
 #ifdef CONFIG_CPU_FREQ
 int cpufreq_boost_trigger_state(int state);
-int cpufreq_boost_enabled(void);
+bool cpufreq_boost_enabled(void);
 int cpufreq_enable_boost_support(void);
 bool policy_has_boost_freq(struct cpufreq_policy *policy);
 
@@ -1164,9 +1164,9 @@ static inline int cpufreq_boost_trigger_state(int state)
 {
 	return 0;
 }
-static inline int cpufreq_boost_enabled(void)
+static inline bool cpufreq_boost_enabled(void)
 {
-	return 0;
+	return false;
 }
 
 static inline int cpufreq_enable_boost_support(void)
-- 
cgit v1.2.3


From 0214b27fc949a7bcaf57e71a7f9f534d6481d08b Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson@linux.dev>
Date: Mon, 24 Jun 2024 13:46:00 -0400
Subject: iio: Add iio_read_channel_label to inkern API

It can be convenient for other in-kernel drivers to reuse IIO channel
labels. Export the iio_read_channel_label function to allow this. The
signature is different depending on where we are calling it from, so
the meat is moved to do_iio_read_channel_label.

Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://patch.msgid.link/20240624174601.1527244-2-sean.anderson@linux.dev
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/consumer.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index e9910b41d48e..333d1d8ccb37 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -441,4 +441,14 @@ ssize_t iio_read_channel_ext_info(struct iio_channel *chan,
 ssize_t iio_write_channel_ext_info(struct iio_channel *chan, const char *attr,
 				   const char *buf, size_t len);
 
+/**
+ * iio_read_channel_label() - read label for a given channel
+ * @chan:		The channel being queried.
+ * @buf:		Where to store the attribute value. Assumed to hold
+ *			at least PAGE_SIZE bytes.
+ *
+ * Returns the number of bytes written to buf, or an error code.
+ */
+ssize_t iio_read_channel_label(struct iio_channel *chan, char *buf);
+
 #endif
-- 
cgit v1.2.3


From 5476394aa9f27d670dd2bac426fdb6ac12b12cb3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 27 Jun 2024 13:14:01 +0200
Subject: block: simplify queue_logical_block_size

queue_logical_block_size is never called with a 0 queue, and the
logical_block_size field in queue_limits is always initialized for
a live queue.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240627111407.476276-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 53c41ef4222c..4d0d4b83bc74 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1227,12 +1227,7 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev)
 
 static inline unsigned queue_logical_block_size(const struct request_queue *q)
 {
-	int retval = 512;
-
-	if (q && q->limits.logical_block_size)
-		retval = q->limits.logical_block_size;
-
-	return retval;
+	return q->limits.logical_block_size;
 }
 
 static inline unsigned int bdev_logical_block_size(struct block_device *bdev)
-- 
cgit v1.2.3


From 3ebbd9f6de7ec6d538639ebb657246f629ace81e Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Thu, 27 Jun 2024 16:33:46 +0100
Subject: net: move ethtool-related netdev state into its own struct

net_dev->ethtool is a pointer to new struct ethtool_netdev_state, which
 currently contains only the wol_enabled field.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Link: https://patch.msgid.link/293a562278371de7534ed1eb17531838ca090633.1719502239.git.ecree.xilinx@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h   | 8 ++++++++
 include/linux/netdevice.h | 8 +++++---
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c7f6f2bc9cac..374639e661d1 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1004,6 +1004,14 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
 				       const struct ethtool_link_ksettings *cmd,
 				       u32 *dev_speed, u8 *dev_duplex);
 
+/**
+ * struct ethtool_netdev_state - per-netdevice state for ethtool features
+ * @wol_enabled:	Wake-on-LAN is enabled
+ */
+struct ethtool_netdev_state {
+	unsigned		wol_enabled:1;
+};
+
 struct phy_device;
 struct phy_tdr_config;
 struct phy_plca_cfg;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1e3401093c13..3c719f0d5f5a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -80,6 +80,7 @@ struct xdp_buff;
 struct xdp_frame;
 struct xdp_metadata_ops;
 struct xdp_md;
+struct ethtool_netdev_state;
 
 typedef u32 xdp_features_t;
 
@@ -1986,8 +1987,6 @@ enum netdev_reg_state {
  *			switch driver and used to set the phys state of the
  *			switch port.
  *
- *	@wol_enabled:	Wake-on-LAN is enabled
- *
  *	@threaded:	napi threaded mode is enabled
  *
  *	@module_fw_flash_in_progress:	Module firmware flashing is in progress.
@@ -2001,6 +2000,7 @@ enum netdev_reg_state {
  *	@udp_tunnel_nic_info:	static structure describing the UDP tunnel
  *				offload capabilities of the device
  *	@udp_tunnel_nic:	UDP tunnel offload state
+ *	@ethtool:	ethtool related state
  *	@xdp_state:		stores info on attached XDP BPF programs
  *
  *	@nested_level:	Used as a parameter of spin_lock_nested() of
@@ -2375,7 +2375,7 @@ struct net_device {
 	struct lock_class_key	*qdisc_tx_busylock;
 	bool			proto_down;
 	bool			threaded;
-	unsigned		wol_enabled:1;
+
 	unsigned		module_fw_flash_in_progress:1;
 	struct list_head	net_notifier_list;
 
@@ -2386,6 +2386,8 @@ struct net_device {
 	const struct udp_tunnel_nic_info	*udp_tunnel_nic_info;
 	struct udp_tunnel_nic	*udp_tunnel_nic;
 
+	struct ethtool_netdev_state *ethtool;
+
 	/* protected by rtnl_lock */
 	struct bpf_xdp_entity	xdp_state[__MAX_XDP_MODE];
 
-- 
cgit v1.2.3


From 6ad2962f8adfd53fca52dce7f830783e95d99ce7 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Thu, 27 Jun 2024 16:33:47 +0100
Subject: net: ethtool: attach an XArray of custom RSS contexts to a netdevice

Each context stores the RXFH settings (indir, key, and hfunc) as well
 as optionally some driver private data.
Delete any still-existing contexts at netdev unregister time.

Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Link: https://patch.msgid.link/cbd1c402cec38f2e03124f2ab65b4ae4e08bd90d.1719502240.git.ecree.xilinx@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 374639e661d1..c741d6403364 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -159,6 +159,46 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
 	return index % n_rx_rings;
 }
 
+/**
+ * struct ethtool_rxfh_context - a custom RSS context configuration
+ * @indir_size: Number of u32 entries in indirection table
+ * @key_size: Size of hash key, in bytes
+ * @priv_size: Size of driver private data, in bytes
+ * @hfunc: RSS hash function identifier.  One of the %ETH_RSS_HASH_*
+ * @input_xfrm: Defines how the input data is transformed. Valid values are one
+ *	of %RXH_XFRM_*.
+ * @indir_configured: indir has been specified (at create time or subsequently)
+ * @key_configured: hkey has been specified (at create time or subsequently)
+ */
+struct ethtool_rxfh_context {
+	u32 indir_size;
+	u32 key_size;
+	u16 priv_size;
+	u8 hfunc;
+	u8 input_xfrm;
+	u8 indir_configured:1;
+	u8 key_configured:1;
+	/* private: driver private data, indirection table, and hash key are
+	 * stored sequentially in @data area.  Use below helpers to access.
+	 */
+	u8 data[] __aligned(sizeof(void *));
+};
+
+static inline void *ethtool_rxfh_context_priv(struct ethtool_rxfh_context *ctx)
+{
+	return ctx->data;
+}
+
+static inline u32 *ethtool_rxfh_context_indir(struct ethtool_rxfh_context *ctx)
+{
+	return (u32 *)(ctx->data + ALIGN(ctx->priv_size, sizeof(u32)));
+}
+
+static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx)
+{
+	return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size);
+}
+
 /* declare a link mode bitmap */
 #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name)		\
 	DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS)
@@ -1006,9 +1046,11 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
 
 /**
  * struct ethtool_netdev_state - per-netdevice state for ethtool features
+ * @rss_ctx:		XArray of custom RSS contexts
  * @wol_enabled:	Wake-on-LAN is enabled
  */
 struct ethtool_netdev_state {
+	struct xarray		rss_ctx;
 	unsigned		wol_enabled:1;
 };
 
-- 
cgit v1.2.3


From eac9122f0c41b832065e01977c34946ec8e76c24 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Thu, 27 Jun 2024 16:33:48 +0100
Subject: net: ethtool: record custom RSS contexts in the XArray

Since drivers are still choosing the context IDs, we have to force the
 XArray to use the ID they've chosen rather than picking one ourselves,
 and handle the case where they give us an ID that's already in use.

Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Link: https://patch.msgid.link/801f5faa4cec87c65b2c6e27fb220c944bce593a.1719502240.git.ecree.xilinx@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c741d6403364..43a2a143034f 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -199,6 +199,17 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx)
 	return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size);
 }
 
+static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size,
+					       u16 priv_size)
+{
+	size_t indir_bytes = array_size(indir_size, sizeof(u32));
+	size_t flex_len;
+
+	flex_len = size_add(size_add(indir_bytes, key_size),
+			    ALIGN(priv_size, sizeof(u32)));
+	return struct_size_t(struct ethtool_rxfh_context, data, flex_len);
+}
+
 /* declare a link mode bitmap */
 #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name)		\
 	DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS)
@@ -710,6 +721,8 @@ struct ethtool_rxfh_param {
  *	contexts.
  * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor
  *	RSS.
+ * @rxfh_priv_size: size of the driver private data area the core should
+ *	allocate for an RSS context (in &struct ethtool_rxfh_context).
  * @supported_coalesce_params: supported types of interrupt coalescing.
  * @supported_ring_params: supported ring params.
  * @get_drvinfo: Report driver/device information. Modern drivers no
@@ -895,6 +908,7 @@ struct ethtool_ops {
 	u32     cap_link_lanes_supported:1;
 	u32     cap_rss_ctx_supported:1;
 	u32	cap_rss_sym_xor_supported:1;
+	u16	rxfh_priv_size;
 	u32	supported_coalesce_params;
 	u32	supported_ring_params;
 	void	(*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
-- 
cgit v1.2.3


From 847a8ab186767be6ee95643f9739fa9d0f839589 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Thu, 27 Jun 2024 16:33:49 +0100
Subject: net: ethtool: let the core choose RSS context IDs

Add a new API to create/modify/remove RSS contexts, that passes in the
 newly-chosen context ID (not as a pointer) rather than leaving the
 driver to choose it on create.  Also pass in the ctx, allowing drivers
 to easily use its private data area to store their hardware-specific
 state.
Keep the existing .set_rxfh API for now as a fallback, but deprecate it
 for custom contexts (rss_context != 0).

Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Link: https://patch.msgid.link/45f1fe61df2163c091ec394c9f52000c8b16cc3b.1719502240.git.ecree.xilinx@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 43a2a143034f..4292a25b2427 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -723,6 +723,10 @@ struct ethtool_rxfh_param {
  *	RSS.
  * @rxfh_priv_size: size of the driver private data area the core should
  *	allocate for an RSS context (in &struct ethtool_rxfh_context).
+ * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID.  If this
+ *	is zero then the core may choose any (nonzero) ID, otherwise the core
+ *	will only use IDs strictly less than this value, as the @rss_context
+ *	argument to @create_rxfh_context and friends.
  * @supported_coalesce_params: supported types of interrupt coalescing.
  * @supported_ring_params: supported ring params.
  * @get_drvinfo: Report driver/device information. Modern drivers no
@@ -819,6 +823,32 @@ struct ethtool_rxfh_param {
  *	will remain unchanged.
  *	Returns a negative error code or zero. An error code must be returned
  *	if at least one unsupported change was requested.
+ * @create_rxfh_context: Create a new RSS context with the specified RX flow
+ *	hash indirection table, hash key, and hash function.
+ *	The &struct ethtool_rxfh_context for this context is passed in @ctx;
+ *	note that the indir table, hkey and hfunc are not yet populated as
+ *	of this call.  The driver does not need to update these; the core
+ *	will do so if this op succeeds.
+ *	However, if @rxfh.indir is set to %NULL, the driver must update the
+ *	indir table in @ctx with the (default or inherited) table actually in
+ *	use; similarly, if @rxfh.key is %NULL, @rxfh.hfunc is
+ *	%ETH_RSS_HASH_NO_CHANGE, or @rxfh.input_xfrm is %RXH_XFRM_NO_CHANGE,
+ *	the driver should update the corresponding information in @ctx.
+ *	If the driver provides this method, it must also provide
+ *	@modify_rxfh_context and @remove_rxfh_context.
+ *	Returns a negative error code or zero.
+ * @modify_rxfh_context: Reconfigure the specified RSS context.  Allows setting
+ *	the contents of the RX flow hash indirection table, hash key, and/or
+ *	hash function associated with the given context.
+ *	Parameters which are set to %NULL or zero will remain unchanged.
+ *	The &struct ethtool_rxfh_context for this context is passed in @ctx;
+ *	note that it will still contain the *old* settings.  The driver does
+ *	not need to update these; the core will do so if this op succeeds.
+ *	Returns a negative error code or zero. An error code must be returned
+ *	if at least one unsupported change was requested.
+ * @remove_rxfh_context: Remove the specified RSS context.
+ *	The &struct ethtool_rxfh_context for this context is passed in @ctx.
+ *	Returns a negative error code or zero.
  * @get_channels: Get number of channels.
  * @set_channels: Set number of channels.  Returns a negative error code or
  *	zero.
@@ -909,6 +939,7 @@ struct ethtool_ops {
 	u32     cap_rss_ctx_supported:1;
 	u32	cap_rss_sym_xor_supported:1;
 	u16	rxfh_priv_size;
+	u32	rxfh_max_context_id;
 	u32	supported_coalesce_params;
 	u32	supported_ring_params;
 	void	(*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
@@ -971,6 +1002,15 @@ struct ethtool_ops {
 	int	(*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *);
 	int	(*set_rxfh)(struct net_device *, struct ethtool_rxfh_param *,
 			    struct netlink_ext_ack *extack);
+	int	(*create_rxfh_context)(struct net_device *,
+				       struct ethtool_rxfh_context *ctx,
+				       const struct ethtool_rxfh_param *rxfh);
+	int	(*modify_rxfh_context)(struct net_device *,
+				       struct ethtool_rxfh_context *ctx,
+				       const struct ethtool_rxfh_param *rxfh);
+	int	(*remove_rxfh_context)(struct net_device *,
+				       struct ethtool_rxfh_context *ctx,
+				       u32 rss_context);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);
-- 
cgit v1.2.3


From 30a32cdf6b130356805b3193a6208de25cbb2015 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Thu, 27 Jun 2024 16:33:50 +0100
Subject: net: ethtool: add an extack parameter to new rxfh_context APIs

Currently passed as NULL, but will allow drivers to report back errors
 when ethnl support for these ops is added.

Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Link: https://patch.msgid.link/6e0012347d175fdd1280363d7bfa76a2f2777e17.1719502240.git.ecree.xilinx@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 4292a25b2427..9cdbc8e3ed5c 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1004,13 +1004,16 @@ struct ethtool_ops {
 			    struct netlink_ext_ack *extack);
 	int	(*create_rxfh_context)(struct net_device *,
 				       struct ethtool_rxfh_context *ctx,
-				       const struct ethtool_rxfh_param *rxfh);
+				       const struct ethtool_rxfh_param *rxfh,
+				       struct netlink_ext_ack *extack);
 	int	(*modify_rxfh_context)(struct net_device *,
 				       struct ethtool_rxfh_context *ctx,
-				       const struct ethtool_rxfh_param *rxfh);
+				       const struct ethtool_rxfh_param *rxfh,
+				       struct netlink_ext_ack *extack);
 	int	(*remove_rxfh_context)(struct net_device *,
 				       struct ethtool_rxfh_context *ctx,
-				       u32 rss_context);
+				       u32 rss_context,
+				       struct netlink_ext_ack *extack);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);
-- 
cgit v1.2.3


From 87925151191b64d9623e63ccf11e517eacc99d7d Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Thu, 27 Jun 2024 16:33:51 +0100
Subject: net: ethtool: add a mutex protecting RSS contexts

While this is not needed to serialise the ethtool entry points (which
 are all under RTNL), drivers may have cause to asynchronously access
 dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to
 do this safely without needing to take the RTNL.

Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Link: https://patch.msgid.link/7f9c15eb7525bf87af62c275dde3a8570ee8bf0a.1719502240.git.ecree.xilinx@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 9cdbc8e3ed5c..f74bb0cf8ed1 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1104,10 +1104,13 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
 /**
  * struct ethtool_netdev_state - per-netdevice state for ethtool features
  * @rss_ctx:		XArray of custom RSS contexts
+ * @rss_lock:		Protects entries in @rss_ctx.  May be taken from
+ *			within RTNL.
  * @wol_enabled:	Wake-on-LAN is enabled
  */
 struct ethtool_netdev_state {
 	struct xarray		rss_ctx;
+	struct mutex		rss_lock;
 	unsigned		wol_enabled:1;
 };
 
-- 
cgit v1.2.3


From b8c7dd15ceb87e5f37ec1ed7b56c279d98f3eb53 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Tue, 11 Jun 2024 17:12:22 -0700
Subject: kernel-wide: fix spelling mistakes like "assocative" -> "associative"

There were several instances of the string "assocat" in the kernel, which
should have been spelled "associat", with the various endings of -ive,
-ed, -ion, and sometimes beginnging with dis-.

Add to the spelling dictionary the corrections so that future instances
will be caught by checkpatch, and fix the instances found.

Originally noticed by accident with a 'git grep socat'.

Link: https://lkml.kernel.org/r/20240612001247.356867-1-jesse.brandeburg@intel.com
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nvme-fc-driver.h  | 2 +-
 include/linux/soc/apple/rtkit.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 4109f1bd6128..1177dde77104 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -620,7 +620,7 @@ enum {
  *
  * Structure used between LLDD and nvmet-fc layer to represent the exchange
  * context for a FC-NVME FCP I/O operation (e.g. a nvme sqe, the sqe-related
- * memory transfers, and its assocated cqe transfer).
+ * memory transfers, and its associated cqe transfer).
  *
  * The structure is allocated by the LLDD whenever a FCP CMD IU is received
  * from the FC link. The address of the structure is passed to the nvmet-fc
diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h
index 8c9ca857ccf6..c06d17599ae7 100644
--- a/include/linux/soc/apple/rtkit.h
+++ b/include/linux/soc/apple/rtkit.h
@@ -69,7 +69,7 @@ struct apple_rtkit;
  * Initializes the internal state required to handle RTKit. This
  * should usually be called within _probe.
  *
- * @dev:         Pointer to the device node this coprocessor is assocated with
+ * @dev:         Pointer to the device node this coprocessor is associated with
  * @cookie:      opaque cookie passed to all functions defined in rtkit_ops
  * @mbox_name:   mailbox name used to communicate with the co-processor
  * @mbox_idx:    mailbox index to be used if mbox_name is NULL
@@ -83,7 +83,7 @@ struct apple_rtkit *devm_apple_rtkit_init(struct device *dev, void *cookie,
  * Non-devm version of devm_apple_rtkit_init. Must be freed with
  * apple_rtkit_free.
  *
- * @dev:         Pointer to the device node this coprocessor is assocated with
+ * @dev:         Pointer to the device node this coprocessor is associated with
  * @cookie:      opaque cookie passed to all functions defined in rtkit_ops
  * @mbox_name:   mailbox name used to communicate with the co-processor
  * @mbox_idx:    mailbox index to be used if mbox_name is NULL
-- 
cgit v1.2.3


From 7c812814e8c34a41bff6fe49987760ffaf8702af Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 24 Jun 2024 18:39:49 +0300
Subject: compiler.h: simplify data_race() macro

-Wdeclaration-after-statement used since forever required statement
expressions to inject __kcsan_disable_current(), __kcsan_enable_current()
to mark data race. Now that it is gone, make macro expansion simpler.

__unqual_scalar_typeof() is wordy macro by itself.
"expr" is expanded twice.

Link: https://lkml.kernel.org/r/fb62163f-ba21-4661-be5b-bb5124abc87d@p183
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compiler.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 8c252e073bd8..2ea6120f3f7a 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -200,10 +200,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
  */
 #define data_race(expr)							\
 ({									\
-	__unqual_scalar_typeof(({ expr; })) __v = ({			\
-		__kcsan_disable_current();				\
-		expr;							\
-	});								\
+	__kcsan_disable_current();					\
+	__auto_type __v = (expr);					\
 	__kcsan_enable_current();					\
 	__v;								\
 })
-- 
cgit v1.2.3


From 32c1935280f11ef03efdb069aa46c0bc631eab7d Mon Sep 17 00:00:00 2001
From: Nils Rothaug <nils.rothaug@gmx.de>
Date: Sun, 23 Jun 2024 12:46:43 +0200
Subject: media: tuner-simple: Add support for Tena TNF931D-DFDR1

Tuner ranges were determined by USB capturing the vendor driver of a
MyGica UTV3 video capture card.

Signed-off-by: Nils Rothaug <nils.rothaug@gmx.de>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/media/tuner.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/media/tuner.h b/include/media/tuner.h
index a7796e0a3659..c5fd6faabfd6 100644
--- a/include/media/tuner.h
+++ b/include/media/tuner.h
@@ -133,6 +133,7 @@
 #define TUNER_SONY_BTF_PK467Z		90	/* NTSC_JP */
 #define TUNER_SONY_BTF_PB463Z		91	/* NTSC */
 #define TUNER_SI2157			92
+#define TUNER_TENA_TNF_931D_DFDR1	93
 
 /* tv card specific */
 #define TDA9887_PRESENT			(1<<0)
-- 
cgit v1.2.3


From e31604d5922ef76a58bd3f9ae719486887eb266e Mon Sep 17 00:00:00 2001
From: Nils Rothaug <nils.rothaug@gmx.de>
Date: Sun, 23 Jun 2024 12:46:44 +0200
Subject: media: rc: add keymap for MyGica UTV3 remote

Add keymap for the simple IR (RC-5) remote that comes with the
MyGica UTV3 Analog USB2.0 TV Box video capture card.

Signed-off-by: Nils Rothaug <nils.rothaug@gmx.de>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/media/rc-map.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 4676545ffd8f..4867eb2f931e 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -290,6 +290,7 @@ struct rc_map *rc_map_get(const char *name);
 #define RC_MAP_MSI_DIGIVOX_III           "rc-msi-digivox-iii"
 #define RC_MAP_MSI_TVANYWHERE            "rc-msi-tvanywhere"
 #define RC_MAP_MSI_TVANYWHERE_PLUS       "rc-msi-tvanywhere-plus"
+#define RC_MAP_MYGICA_UTV3               "rc-mygica-utv3"
 #define RC_MAP_NEBULA                    "rc-nebula"
 #define RC_MAP_NEC_TERRATEC_CINERGY_XS   "rc-nec-terratec-cinergy-xs"
 #define RC_MAP_NORWOOD                   "rc-norwood"
-- 
cgit v1.2.3


From 3e26d9f08fbe0b73e951a5e810fdb7a332b7e37f Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Thu, 20 Jun 2024 14:27:22 +0200
Subject: iio: core: Add new DMABUF interface infrastructure

Add the necessary infrastructure to the IIO core to support a new
optional DMABUF based interface.

With this new interface, DMABUF objects (externally created) can be
attached to a IIO buffer, and subsequently used for data transfer.

A userspace application can then use this interface to share DMABUF
objects between several interfaces, allowing it to transfer data in a
zero-copy fashion, for instance between IIO and the USB stack.

The userspace application can also memory-map the DMABUF objects, and
access the sample data directly. The advantage of doing this vs. the
read() interface is that it avoids an extra copy of the data between the
kernel and userspace. This is particularly userful for high-speed
devices which produce several megabytes or even gigabytes of data per
second.

As part of the interface, 3 new IOCTLs have been added:

IIO_BUFFER_DMABUF_ATTACH_IOCTL(int fd):
 Attach the DMABUF object identified by the given file descriptor to the
 buffer.

IIO_BUFFER_DMABUF_DETACH_IOCTL(int fd):
 Detach the DMABUF object identified by the given file descriptor from
 the buffer. Note that closing the IIO buffer's file descriptor will
 automatically detach all previously attached DMABUF objects.

IIO_BUFFER_DMABUF_ENQUEUE_IOCTL(struct iio_dmabuf *):
 Request a data transfer to/from the given DMABUF object. Its file
 descriptor, as well as the transfer size and flags are provided in the
 "iio_dmabuf" structure.

These three IOCTLs have to be performed on the IIO buffer's file
descriptor, obtained using the IIO_BUFFER_GET_FD_IOCTL() ioctl.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Co-developed-by: Nuno Sa <nuno.sa@analog.com>
Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240620122726.41232-4-paul@crapouillou.net
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/buffer_impl.h | 33 +++++++++++++++++++++++++++++++++
 include/uapi/linux/iio/buffer.h | 22 ++++++++++++++++++++++
 2 files changed, 55 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h
index 89c3fd7c29ca..e72552e026f3 100644
--- a/include/linux/iio/buffer_impl.h
+++ b/include/linux/iio/buffer_impl.h
@@ -9,8 +9,12 @@
 #include <uapi/linux/iio/buffer.h>
 #include <linux/iio/buffer.h>
 
+struct dma_buf_attachment;
+struct dma_fence;
 struct iio_dev;
+struct iio_dma_buffer_block;
 struct iio_buffer;
+struct sg_table;
 
 /**
  * INDIO_BUFFER_FLAG_FIXED_WATERMARK - Watermark level of the buffer can not be
@@ -39,6 +43,16 @@ struct iio_buffer;
  *                      device stops sampling. Calles are balanced with @enable.
  * @release:		called when the last reference to the buffer is dropped,
  *			should free all resources allocated by the buffer.
+ * @attach_dmabuf:	called from userspace via ioctl to attach one external
+ *			DMABUF.
+ * @detach_dmabuf:	called from userspace via ioctl to detach one previously
+ *			attached DMABUF.
+ * @enqueue_dmabuf:	called from userspace via ioctl to queue this DMABUF
+ *			object to this buffer. Requires a valid DMABUF fd, that
+ *			was previouly attached to this buffer.
+ * @lock_queue:		called when the core needs to lock the buffer queue;
+ *                      it is used when enqueueing DMABUF objects.
+ * @unlock_queue:       used to unlock a previously locked buffer queue
  * @modes:		Supported operating modes by this buffer type
  * @flags:		A bitmask combination of INDIO_BUFFER_FLAG_*
  *
@@ -68,6 +82,17 @@ struct iio_buffer_access_funcs {
 
 	void (*release)(struct iio_buffer *buffer);
 
+	struct iio_dma_buffer_block * (*attach_dmabuf)(struct iio_buffer *buffer,
+						       struct dma_buf_attachment *attach);
+	void (*detach_dmabuf)(struct iio_buffer *buffer,
+			      struct iio_dma_buffer_block *block);
+	int (*enqueue_dmabuf)(struct iio_buffer *buffer,
+			      struct iio_dma_buffer_block *block,
+			      struct dma_fence *fence, struct sg_table *sgt,
+			      size_t size, bool cyclic);
+	void (*lock_queue)(struct iio_buffer *buffer);
+	void (*unlock_queue)(struct iio_buffer *buffer);
+
 	unsigned int modes;
 	unsigned int flags;
 };
@@ -136,6 +161,12 @@ struct iio_buffer {
 
 	/* @ref: Reference count of the buffer. */
 	struct kref ref;
+
+	/* @dmabufs: List of DMABUF attachments */
+	struct list_head dmabufs; /* P: dmabufs_mutex */
+
+	/* @dmabufs_mutex: Protects dmabufs */
+	struct mutex dmabufs_mutex;
 };
 
 /**
@@ -159,6 +190,8 @@ void iio_buffer_init(struct iio_buffer *buffer);
 struct iio_buffer *iio_buffer_get(struct iio_buffer *buffer);
 void iio_buffer_put(struct iio_buffer *buffer);
 
+void iio_buffer_signal_dmabuf_done(struct dma_fence *fence, int ret);
+
 #else /* CONFIG_IIO_BUFFER */
 
 static inline void iio_buffer_get(struct iio_buffer *buffer) {}
diff --git a/include/uapi/linux/iio/buffer.h b/include/uapi/linux/iio/buffer.h
index 13939032b3f6..c666aa95e532 100644
--- a/include/uapi/linux/iio/buffer.h
+++ b/include/uapi/linux/iio/buffer.h
@@ -5,6 +5,28 @@
 #ifndef _UAPI_IIO_BUFFER_H_
 #define _UAPI_IIO_BUFFER_H_
 
+#include <linux/types.h>
+
+/* Flags for iio_dmabuf.flags */
+#define IIO_BUFFER_DMABUF_CYCLIC		(1 << 0)
+#define IIO_BUFFER_DMABUF_SUPPORTED_FLAGS	0x00000001
+
+/**
+ * struct iio_dmabuf - Descriptor for a single IIO DMABUF object
+ * @fd:		file descriptor of the DMABUF object
+ * @flags:	one or more IIO_BUFFER_DMABUF_* flags
+ * @bytes_used:	number of bytes used in this DMABUF for the data transfer.
+ *		Should generally be set to the DMABUF's size.
+ */
+struct iio_dmabuf {
+	__u32 fd;
+	__u32 flags;
+	__u64 bytes_used;
+};
+
 #define IIO_BUFFER_GET_FD_IOCTL			_IOWR('i', 0x91, int)
+#define IIO_BUFFER_DMABUF_ATTACH_IOCTL		_IOW('i', 0x92, int)
+#define IIO_BUFFER_DMABUF_DETACH_IOCTL		_IOW('i', 0x93, int)
+#define IIO_BUFFER_DMABUF_ENQUEUE_IOCTL		_IOW('i', 0x94, struct iio_dmabuf)
 
 #endif /* _UAPI_IIO_BUFFER_H_ */
-- 
cgit v1.2.3


From d85318900c1c06a251ad3d86fba6bbab116a95d5 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Thu, 20 Jun 2024 14:27:23 +0200
Subject: iio: buffer-dma: Enable support for DMABUFs

Implement iio_dma_buffer_attach_dmabuf(), iio_dma_buffer_detach_dmabuf()
and iio_dma_buffer_transfer_dmabuf(), which can then be used by the IIO
DMA buffer implementations.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Co-developed-by: Nuno Sa <nuno.sa@analog.com>
Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240620122726.41232-5-paul@crapouillou.net
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/buffer-dma.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h
index 6e27e47077d5..5eb66a399002 100644
--- a/include/linux/iio/buffer-dma.h
+++ b/include/linux/iio/buffer-dma.h
@@ -7,6 +7,7 @@
 #ifndef __INDUSTRIALIO_DMA_BUFFER_H__
 #define __INDUSTRIALIO_DMA_BUFFER_H__
 
+#include <linux/atomic.h>
 #include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/spinlock.h>
@@ -16,6 +17,9 @@
 struct iio_dma_buffer_queue;
 struct iio_dma_buffer_ops;
 struct device;
+struct dma_buf_attachment;
+struct dma_fence;
+struct sg_table;
 
 /**
  * enum iio_block_state - State of a struct iio_dma_buffer_block
@@ -41,6 +45,10 @@ enum iio_block_state {
  * @queue: Parent DMA buffer queue
  * @kref: kref used to manage the lifetime of block
  * @state: Current state of the block
+ * @cyclic: True if this is a cyclic buffer
+ * @fileio: True if this buffer is used for fileio mode
+ * @sg_table: DMA table for the transfer when transferring a DMABUF
+ * @fence: DMA fence to be signaled when a DMABUF transfer is complete
  */
 struct iio_dma_buffer_block {
 	/* May only be accessed by the owner of the block */
@@ -63,6 +71,12 @@ struct iio_dma_buffer_block {
 	 * queue->list_lock if the block is not owned by the core.
 	 */
 	enum iio_block_state state;
+
+	bool cyclic;
+	bool fileio;
+
+	struct sg_table *sg_table;
+	struct dma_fence *fence;
 };
 
 /**
@@ -72,6 +86,7 @@ struct iio_dma_buffer_block {
  * @pos: Read offset in the active block
  * @block_size: Size of each block
  * @next_dequeue: index of next block that will be dequeued
+ * @enabled: Whether the buffer is operating in fileio mode
  */
 struct iio_dma_buffer_queue_fileio {
 	struct iio_dma_buffer_block *blocks[2];
@@ -80,6 +95,7 @@ struct iio_dma_buffer_queue_fileio {
 	size_t block_size;
 
 	unsigned int next_dequeue;
+	bool enabled;
 };
 
 /**
@@ -95,6 +111,7 @@ struct iio_dma_buffer_queue_fileio {
  *   the DMA controller
  * @incoming: List of buffers on the incoming queue
  * @active: Whether the buffer is currently active
+ * @num_dmabufs: Total number of DMABUFs attached to this queue
  * @fileio: FileIO state
  */
 struct iio_dma_buffer_queue {
@@ -107,6 +124,7 @@ struct iio_dma_buffer_queue {
 	struct list_head incoming;
 
 	bool active;
+	atomic_t num_dmabufs;
 
 	struct iio_dma_buffer_queue_fileio fileio;
 };
@@ -144,4 +162,17 @@ int iio_dma_buffer_init(struct iio_dma_buffer_queue *queue,
 void iio_dma_buffer_exit(struct iio_dma_buffer_queue *queue);
 void iio_dma_buffer_release(struct iio_dma_buffer_queue *queue);
 
+struct iio_dma_buffer_block *
+iio_dma_buffer_attach_dmabuf(struct iio_buffer *buffer,
+			     struct dma_buf_attachment *attach);
+void iio_dma_buffer_detach_dmabuf(struct iio_buffer *buffer,
+				  struct iio_dma_buffer_block *block);
+int iio_dma_buffer_enqueue_dmabuf(struct iio_buffer *buffer,
+				  struct iio_dma_buffer_block *block,
+				  struct dma_fence *fence,
+				  struct sg_table *sgt,
+				  size_t size, bool cyclic);
+void iio_dma_buffer_lock_queue(struct iio_buffer *buffer);
+void iio_dma_buffer_unlock_queue(struct iio_buffer *buffer);
+
 #endif
-- 
cgit v1.2.3


From dbbe7eaf0e4795bf003ac06872aaf52b6b6b1310 Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa@analog.com>
Date: Thu, 6 Jun 2024 09:22:37 +0200
Subject: dev_printk: add new dev_err_probe() helpers

This is similar to dev_err_probe() but for cases where an ERR_PTR() or
ERR_CAST() is to be returned simplifying patterns like:

	dev_err_probe(dev, ret, ...);
	return ERR_PTR(ret)
or
	dev_err_probe(dev, PTR_ERR(ptr), ...);
	return ERR_CAST(ptr)

Signed-off-by: Nuno Sa <nuno.sa@analog.com>
Link: https://patch.msgid.link/20240606-dev-add_dev_errp_probe-v3-1-51bb229edd79@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/dev_printk.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h
index ae80a303c216..ca32b5bb28eb 100644
--- a/include/linux/dev_printk.h
+++ b/include/linux/dev_printk.h
@@ -277,4 +277,12 @@ do {									\
 
 __printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...);
 
+/* Simple helper for dev_err_probe() when ERR_PTR() is to be returned. */
+#define dev_err_ptr_probe(dev, ___err, fmt, ...) \
+	ERR_PTR(dev_err_probe(dev, ___err, fmt, ##__VA_ARGS__))
+
+/* Simple helper for dev_err_probe() when ERR_CAST() is to be returned. */
+#define dev_err_cast_probe(dev, ___err_ptr, fmt, ...) \
+	ERR_PTR(dev_err_probe(dev, PTR_ERR(___err_ptr), fmt, ##__VA_ARGS__))
+
 #endif /* _DEVICE_PRINTK_H_ */
-- 
cgit v1.2.3


From f6549f538fe0b2c389e1a7037f4e21039e25137a Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Sat, 29 Jun 2024 14:42:12 +0200
Subject: ata,scsi: libata-core: Do not leak memory for ata_port struct members

libsas is currently not freeing all the struct ata_port struct members,
e.g. ncq_sense_buf for a driver supporting Command Duration Limits (CDL).

Add a function, ata_port_free(), that is used to free a ata_port,
including its struct members. It makes sense to keep the code related to
freeing a ata_port in its own function, which will also free all the
struct members of struct ata_port.

Fixes: 18bd7718b5c4 ("scsi: ata: libata: Handle completion of CDL commands using policy 0xD")
Reviewed-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240629124210.181537-8-cassel@kernel.org
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 13fb41d25da6..7d3bd7c9664a 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1249,6 +1249,7 @@ extern int ata_slave_link_init(struct ata_port *ap);
 extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
 					   struct ata_port_info *, struct Scsi_Host *);
 extern void ata_port_probe(struct ata_port *ap);
+extern void ata_port_free(struct ata_port *ap);
 extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap);
 extern void ata_sas_tport_delete(struct ata_port *ap);
 int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim,
-- 
cgit v1.2.3


From 61842868de13aa7fd7391c626e889f4d6f1450bf Mon Sep 17 00:00:00 2001
From: Jose Ignacio Tornos Martinez <jtornosm@redhat.com>
Date: Fri, 10 May 2024 10:57:22 +0200
Subject: module: create weak dependecies

It has been seen that for some network mac drivers (i.e. lan78xx) the
related module for the phy is loaded dynamically depending on the current
hardware. In this case, the associated phy is read using mdio bus and then
the associated phy module is loaded during runtime (kernel function
phy_request_driver_module). However, no software dependency is defined, so
the user tools will no be able to get this dependency. For example, if
dracut is used and the hardware is present, lan78xx will be included but no
phy module will be added, and in the next restart the device will not work
from boot because no related phy will be found during initramfs stage.

In order to solve this, we could define a normal 'pre' software dependency
in lan78xx module with all the possible phy modules (there may be some),
but proceeding in that way, all the possible phy modules would be loaded
while only one is necessary.

The idea is to create a new type of dependency, that we are going to call
'weak' to be used only by the user tools that need to detect this situation.
In that way, for example, dracut could check the 'weak' dependency of the
modules involved in order to install these dependencies in initramfs too.
That is, for the commented lan78xx module, defining the 'weak' dependency
with the possible phy modules list, only the necessary phy would be loaded
on demand keeping the same behavior, but all the possible phy modules would
be available from initramfs.

The 'weak' dependency support has been included in kmod:
https://github.com/kmod-project/kmod/commit/05828b4a6e9327a63ef94df544a042b5e9ce4fe7
But, take into account that this can only be used if depmod is new enough.
If it isn't, depmod will have the same behavior as always (keeping backward
compatibility) and the information for the 'weak' dependency will not be
provided.

Signed-off-by: Jose Ignacio Tornos Martinez <jtornosm@redhat.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 include/linux/module.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index ffa1c603163c..ae19bf7e3131 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -173,6 +173,12 @@ extern void cleanup_module(void);
  */
 #define MODULE_SOFTDEP(_softdep) MODULE_INFO(softdep, _softdep)
 
+/*
+ * Weak module dependencies. See man modprobe.d for details.
+ * Example: MODULE_WEAKDEP("module-foo")
+ */
+#define MODULE_WEAKDEP(_weakdep) MODULE_INFO(weakdep, _weakdep)
+
 /*
  * MODULE_FILE is used for generating modules.builtin
  * So, make it no-op when this is being built as a module
-- 
cgit v1.2.3


From 5b8feca8dee443055049c8ccfdd97f64a0e1d2b4 Mon Sep 17 00:00:00 2001
From: Daisuke Nojiri <dnojiri@chromium.org>
Date: Thu, 27 Jun 2024 16:53:08 -0700
Subject: dt-bindings: input: cros-ec-keyboard: Add keyboard matrix v3.0

Add support for keyboard matrix version 3.0, which reduces keyboard
ghosting.

Signed-off-by: Daisuke Nojiri <dnojiri@chromium.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Link: https://lore.kernel.org/r/9ae4d96cc2ce8c9de8755b9beffb78c641100fe7.1719531519.git.dnojiri@chromium.org
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 include/dt-bindings/input/cros-ec-keyboard.h | 104 +++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/input/cros-ec-keyboard.h b/include/dt-bindings/input/cros-ec-keyboard.h
index f0ae03634a96..afc12f6aa642 100644
--- a/include/dt-bindings/input/cros-ec-keyboard.h
+++ b/include/dt-bindings/input/cros-ec-keyboard.h
@@ -100,4 +100,108 @@
 	MATRIX_KEY(0x07, 0x0b, KEY_UP)		\
 	MATRIX_KEY(0x07, 0x0c, KEY_LEFT)
 
+/* No numpad */
+#define CROS_TOP_ROW_KEYMAP_V30 \
+	MATRIX_KEY(0x00, 0x01, KEY_F11)		/* T11 */	\
+	MATRIX_KEY(0x00, 0x02, KEY_F1)		/* T1 */	\
+	MATRIX_KEY(0x00, 0x04, KEY_F10)		/* T10 */	\
+	MATRIX_KEY(0x00, 0x0b, KEY_F14)		/* T14 */	\
+	MATRIX_KEY(0x00, 0x0c, KEY_F15)		/* T15 */	\
+	MATRIX_KEY(0x01, 0x02, KEY_F4)		/* T4 */	\
+	MATRIX_KEY(0x01, 0x04, KEY_F7)		/* T7 */	\
+	MATRIX_KEY(0x01, 0x05, KEY_F12)		/* T12 */	\
+	MATRIX_KEY(0x01, 0x09, KEY_F9)		/* T9 */	\
+	MATRIX_KEY(0x02, 0x02, KEY_F3)		/* T3 */	\
+	MATRIX_KEY(0x02, 0x04, KEY_F6)		/* T6 */	\
+	MATRIX_KEY(0x02, 0x0b, KEY_F8)		/* T8 */	\
+	MATRIX_KEY(0x03, 0x02, KEY_F2)		/* T2 */	\
+	MATRIX_KEY(0x03, 0x05, KEY_F13)		/* T13 */	\
+	MATRIX_KEY(0x04, 0x04, KEY_F5)		/* T5 */
+
+#define CROS_MAIN_KEYMAP_V30			/* Keycode */	\
+	MATRIX_KEY(0x00, 0x03, KEY_B)		/* 50 */	\
+	MATRIX_KEY(0x00, 0x05, KEY_N)		/* 51 */	\
+	MATRIX_KEY(0x00, 0x06, KEY_RO)		/* 56 (JIS) */	\
+	MATRIX_KEY(0x00, 0x08, KEY_EQUAL)	/* 13 */	\
+	MATRIX_KEY(0x00, 0x09, KEY_HOME)	/* 80 (Numpad) */	\
+	MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)	/* 62 */	\
+	MATRIX_KEY(0x00, 0x10, KEY_FN)		/* 127 */	\
+								\
+	MATRIX_KEY(0x01, 0x01, KEY_ESC)		/* 110 */	\
+	MATRIX_KEY(0x01, 0x03, KEY_G)		/* 35 */	\
+	MATRIX_KEY(0x01, 0x06, KEY_H)		/* 36 */	\
+	MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)	/* 41 */	\
+	MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)	/* 15 */	\
+	MATRIX_KEY(0x01, 0x0c, KEY_HENKAN)	/* 65 (JIS) */	\
+	MATRIX_KEY(0x01, 0x0e, KEY_LEFTCTRL)	/* 58 */	\
+								\
+	MATRIX_KEY(0x02, 0x01, KEY_TAB)		/* 16 */	\
+	MATRIX_KEY(0x02, 0x03, KEY_T)		/* 21 */	\
+	MATRIX_KEY(0x02, 0x05, KEY_RIGHTBRACE)	/* 28 */	\
+	MATRIX_KEY(0x02, 0x06, KEY_Y)		/* 22 */	\
+	MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)	/* 27 */	\
+	MATRIX_KEY(0x02, 0x09, KEY_DELETE)	/* 76 (Numpad) */	\
+	MATRIX_KEY(0x02, 0x0c, KEY_PAGEUP)	/* 85 (Numpad) */	\
+	MATRIX_KEY(0x02, 0x011, KEY_YEN)	/* 14 (JIS) */	\
+								\
+	MATRIX_KEY(0x03, 0x00, KEY_LEFTMETA)	/* Launcher */	\
+	MATRIX_KEY(0x03, 0x01, KEY_GRAVE)	/* 1 */	\
+	MATRIX_KEY(0x03, 0x03, KEY_5)		/* 6 */	\
+	MATRIX_KEY(0x03, 0x04, KEY_S)		/* 32 */	\
+	MATRIX_KEY(0x03, 0x06, KEY_MINUS)	/* 12 */	\
+	MATRIX_KEY(0x03, 0x08, KEY_6)		/* 7 */		\
+	MATRIX_KEY(0x03, 0x09, KEY_SLEEP)	/* Lock */	\
+	MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)	/* 29 */	\
+	MATRIX_KEY(0x03, 0x0c, KEY_MUHENKAN)	/* 63 (JIS) */	\
+	MATRIX_KEY(0x03, 0x0e, KEY_RIGHTCTRL)	/* 64 */	\
+								\
+	MATRIX_KEY(0x04, 0x01, KEY_A)		/* 31 */	\
+	MATRIX_KEY(0x04, 0x02, KEY_D)		/* 33 */	\
+	MATRIX_KEY(0x04, 0x03, KEY_F)		/* 34 */	\
+	MATRIX_KEY(0x04, 0x05, KEY_K)		/* 38 */	\
+	MATRIX_KEY(0x04, 0x06, KEY_J)		/* 37 */	\
+	MATRIX_KEY(0x04, 0x08, KEY_SEMICOLON)	/* 40 */	\
+	MATRIX_KEY(0x04, 0x09, KEY_L)		/* 39 */	\
+	MATRIX_KEY(0x04, 0x0b, KEY_ENTER)	/* 43 */	\
+	MATRIX_KEY(0x04, 0x0c, KEY_END)		/* 81 (Numpad) */	\
+								\
+	MATRIX_KEY(0x05, 0x01, KEY_1)		/* 2 */	\
+	MATRIX_KEY(0x05, 0x02, KEY_COMMA)	/* 53 */	\
+	MATRIX_KEY(0x05, 0x03, KEY_DOT)		/* 54 */	\
+	MATRIX_KEY(0x05, 0x04, KEY_SLASH)	/* 55 */	\
+	MATRIX_KEY(0x05, 0x05, KEY_C)		/* 48 */	\
+	MATRIX_KEY(0x05, 0x06, KEY_SPACE)	/* 61 */	\
+	MATRIX_KEY(0x05, 0x07, KEY_LEFTSHIFT)	/* 44 */	\
+	MATRIX_KEY(0x05, 0x08, KEY_X)		/* 47 */	\
+	MATRIX_KEY(0x05, 0x09, KEY_V)		/* 49 */	\
+	MATRIX_KEY(0x05, 0x0b, KEY_M)		/* 52 */	\
+	MATRIX_KEY(0x05, 0x0c, KEY_PAGEDOWN)	/* 86 (Numpad) */	\
+								\
+	MATRIX_KEY(0x06, 0x01, KEY_Z)		/* 46 */	\
+	MATRIX_KEY(0x06, 0x02, KEY_3)		/* 4 */		\
+	MATRIX_KEY(0x06, 0x03, KEY_4)		/* 5 */		\
+	MATRIX_KEY(0x06, 0x04, KEY_2)		/* 3 */		\
+	MATRIX_KEY(0x06, 0x05, KEY_8)		/* 9 */		\
+	MATRIX_KEY(0x06, 0x06, KEY_0)		/* 11 */	\
+	MATRIX_KEY(0x06, 0x08, KEY_7)		/* 8 */		\
+	MATRIX_KEY(0x06, 0x09, KEY_9)		/* 10 */	\
+	MATRIX_KEY(0x06, 0x0b, KEY_DOWN)	/* 84 */	\
+	MATRIX_KEY(0x06, 0x0c, KEY_RIGHT)	/* 89 */	\
+	MATRIX_KEY(0x06, 0x0d, KEY_LEFTALT)	/* 60 */	\
+	MATRIX_KEY(0x06, 0x0f, KEY_ASSISTANT)	/* 128 */	\
+	MATRIX_KEY(0x06, 0x11, KEY_BACKSLASH)	/* 42 (JIS, ISO) */	\
+								\
+	MATRIX_KEY(0x07, 0x01, KEY_U)		/* 23 */	\
+	MATRIX_KEY(0x07, 0x02, KEY_I)		/* 24 */	\
+	MATRIX_KEY(0x07, 0x03, KEY_O)		/* 25 */	\
+	MATRIX_KEY(0x07, 0x04, KEY_P)		/* 26 */	\
+	MATRIX_KEY(0x07, 0x05, KEY_Q)		/* 17 */	\
+	MATRIX_KEY(0x07, 0x06, KEY_W)		/* 18 */	\
+	MATRIX_KEY(0x07, 0x07, KEY_RIGHTSHIFT)	/* 57 */	\
+	MATRIX_KEY(0x07, 0x08, KEY_E)		/* 19 */	\
+	MATRIX_KEY(0x07, 0x09, KEY_R)		/* 20 */	\
+	MATRIX_KEY(0x07, 0x0b, KEY_UP)		/* 83 */	\
+	MATRIX_KEY(0x07, 0x0c, KEY_LEFT)	/* 79 */	\
+	MATRIX_KEY(0x07, 0x11, KEY_102ND)	/* 45 (ISO) */
+
 #endif /* _CROS_EC_KEYBOARD_H */
-- 
cgit v1.2.3


From 7c8110057b1bb9ab9f47ac0efc554d5c3a53b693 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 27 Jun 2024 14:35:50 -0700
Subject: tcp_metrics: add UAPI to the header guard

tcp_metrics' header lacks the customary _UAPI in the header guard.
This makes YNL build rules work less seamlessly.
We can easily fix that on YNL side, but this could also be
problematic if we ever needed to create a kernel-only tcp_metrics.h.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp_metrics.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h
index 7cb4a172feed..c48841076998 100644
--- a/include/uapi/linux/tcp_metrics.h
+++ b/include/uapi/linux/tcp_metrics.h
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /* tcp_metrics.h - TCP Metrics Interface */
 
-#ifndef _LINUX_TCP_METRICS_H
-#define _LINUX_TCP_METRICS_H
+#ifndef _UAPI_LINUX_TCP_METRICS_H
+#define _UAPI_LINUX_TCP_METRICS_H
 
 #include <linux/types.h>
 
@@ -58,4 +58,4 @@ enum {
 
 #define TCP_METRICS_CMD_MAX	(__TCP_METRICS_CMD_MAX - 1)
 
-#endif /* _LINUX_TCP_METRICS_H */
+#endif /* _UAPI_LINUX_TCP_METRICS_H */
-- 
cgit v1.2.3


From 85674625e0bc25be4dbaa165a556ef6037328379 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 27 Jun 2024 14:35:51 -0700
Subject: tcp_metrics: add netlink protocol spec in YAML

Add a protocol spec for tcp_metrics, so that it's accessible via YNL.
Useful at the very least for testing fixes.

In this episode of "10,000 ways to complicate netlink" the metric
nest has defines which are off by 1. iproute2 does:

        struct rtattr *m[TCP_METRIC_MAX + 1 + 1];

        parse_rtattr_nested(m, TCP_METRIC_MAX + 1, a);

        for (i = 0; i < TCP_METRIC_MAX + 1; i++) {
                // ...
                attr = m[i + 1];

This is too weird to support in YNL, add a new set of defines
with _correct_ values to the official kernel header.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp_metrics.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h
index c48841076998..927c735a5b0e 100644
--- a/include/uapi/linux/tcp_metrics.h
+++ b/include/uapi/linux/tcp_metrics.h
@@ -27,6 +27,22 @@ enum tcp_metric_index {
 
 #define TCP_METRIC_MAX	(__TCP_METRIC_MAX - 1)
 
+/* Re-define enum tcp_metric_index, again, using the values carried
+ * as netlink attribute types.
+ */
+enum {
+	TCP_METRICS_A_METRICS_RTT = 1,
+	TCP_METRICS_A_METRICS_RTTVAR,
+	TCP_METRICS_A_METRICS_SSTHRESH,
+	TCP_METRICS_A_METRICS_CWND,
+	TCP_METRICS_A_METRICS_REODERING,
+	TCP_METRICS_A_METRICS_RTT_US,
+	TCP_METRICS_A_METRICS_RTTVAR_US,
+
+	__TCP_METRICS_A_METRICS_MAX
+};
+#define TCP_METRICS_A_METRICS_MAX (__TCP_METRICS_A_METRICS_MAX - 1)
+
 enum {
 	TCP_METRICS_ATTR_UNSPEC,
 	TCP_METRICS_ATTR_ADDR_IPV4,		/* u32 */
-- 
cgit v1.2.3


From 65528cfb21fdb68de8ae6dccae19af180d93e143 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Sun, 16 Jun 2024 19:08:34 +0300
Subject: net/mlx5: mlx5_ifc update for multi-plane support

Add new fields to support mlx5 multi-plane feature. Actual support will
be added in following patches.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/36a74a1b1d2b7b59c99cda4abad1794ddde30230.1718553901.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 09d9d87d62c6..61738990e399 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -793,7 +793,7 @@ struct mlx5_ifc_ads_bits {
 	u8         reserved_at_2[0xe];
 	u8         pkey_index[0x10];
 
-	u8         reserved_at_20[0x8];
+	u8         plane_index[0x8];
 	u8         grh[0x1];
 	u8         mlid[0x7];
 	u8         rlid[0x10];
@@ -1990,7 +1990,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   reserved_at_c0[0x8];
 	u8	   migration_multi_load[0x1];
 	u8	   migration_tracking_state[0x1];
-	u8	   reserved_at_ca[0x6];
+	u8	   multiplane_qp_ud[0x1];
+	u8	   reserved_at_cb[0x5];
 	u8	   migration_in_chunks[0x1];
 	u8	   reserved_at_d1[0xf];
 
@@ -4172,7 +4173,8 @@ struct mlx5_ifc_hca_vport_context_bits {
 	u8         has_smi[0x1];
 	u8         has_raw[0x1];
 	u8         grh_required[0x1];
-	u8         reserved_at_104[0xc];
+	u8         reserved_at_104[0x4];
+	u8         num_port_plane[0x8];
 	u8         port_physical_state[0x4];
 	u8         vport_state_policy[0x4];
 	u8         port_state[0x4];
@@ -7692,7 +7694,7 @@ struct mlx5_ifc_mad_ifc_in_bits {
 	u8         op_mod[0x10];
 
 	u8         remote_lid[0x10];
-	u8         reserved_at_50[0x8];
+	u8         plane_index[0x8];
 	u8         port[0x8];
 
 	u8         reserved_at_60[0x20];
@@ -9621,7 +9623,9 @@ struct mlx5_ifc_ptys_reg_bits {
 	u8         an_disable_cap[0x1];
 	u8         reserved_at_3[0x5];
 	u8         local_port[0x8];
-	u8         reserved_at_10[0xd];
+	u8         reserved_at_10[0x8];
+	u8         plane_ind[0x4];
+	u8         reserved_at_1c[0x1];
 	u8         proto_mask[0x3];
 
 	u8         an_status[0x4];
-- 
cgit v1.2.3


From 2a5db20fa532198639671713c6213f96ff285b85 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Sun, 16 Jun 2024 19:08:35 +0300
Subject: RDMA/mlx5: Add support to multi-plane device and port

When multi-plane is supported, a logical port, which is aggregation of
multiple physical plane ports, is exposed for data transmission.
Compared with a normal mlx5 IB port, this logical port supports all
functionalities except Subnet Management.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/7e37c06c9cb243be9ac79930cd17053903785b95.1718553901.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/mlx5/driver.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 0d31f77396fc..a96438ded15f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -917,6 +917,7 @@ struct mlx5_hca_vport_context {
 	u16			qkey_violation_counter;
 	u16			pkey_violation_counter;
 	bool			grh_required;
+	u8			num_plane;
 };
 
 #define STRUCT_FIELD(header, field) \
-- 
cgit v1.2.3


From 5d7e328e20b3d2bd3e1e8bea7a868ab8892aeed1 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Mon, 1 Jul 2024 11:44:42 +0100
Subject: ASoC: cs35l56: Revert support for dual-ownership of ASP registers

This patch reverts a series of commits that allowed for the ASP
registers to be owned by either the driver or the firmware. Nothing
currently depends on the functionality that is being reverted, so
it is safe to remove.

The commits being reverted are (last 3 are bugfixes to the first 2):
commit 72a77d7631c6
("ASoC: cs35l56: Fix to ensure ASP1 registers match cache")
commit 07f7d6e7a124
("ASoC: cs35l56: Fix for initializing ASP1 mixer registers")
commit 4703b014f28b
("ASoC: cs35l56: fix reversed if statement in cs35l56_dspwait_asp1tx_put()")
commit c14f09f010cc
("ASoC: cs35l56: Fix deadlock in ASP1 mixer register initialization")
commit dfd2ffb37399
("ASoC: cs35l56: Prevent overwriting firmware ASP config")

These reverts have been squashed into a single commit because there
would be no reason to revert only some of them (which would just
reintroduce bugs).

The changes introduced by the commits were well-intentioned but
somewhat misguided. ACPI does not provide any information about how
audio hardware is linked together, so that information has to be
hardcoded into drivers. On Windows the firmware is customized to
statically setup appropriate configuration of the audio links,
and the intent of the commits was to re-use this information if the
Linux host drivers aren't taking control of the ASP. This would
avoid having to hardcode the ASP config into the machine driver on
some systems.

However, this added complexity and race conditions into the driver.
It also complicates implementation of new code.

The only case where the ASP is used but the host is not taking
ownership is when CS35L56 is used in SoundWire mode with the ASP
as a reference audio interconnect. But even in that case it's not
necessarily required even if the firmware initialized it. Typically
it is used to avoid the host SDCA drivers having to be capable of
aggregating capture paths from multiple SoundWire peripherals. But
the SOF SoundWire support is capable of doing that aggregation.

Reverting all these commits significantly simplifies the driver.
Let's just use the normal Linux mechanisms of the machine driver and
ALSA controls to set things up instead of trying to use the firmware
to do use-case setup.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://patch.msgid.link/20240701104444.172556-2-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l56.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h
index 1a3c6f66f620..2e19722989f7 100644
--- a/include/sound/cs35l56.h
+++ b/include/sound/cs35l56.h
@@ -267,13 +267,18 @@ struct cs35l56_base {
 	bool fw_patched;
 	bool secured;
 	bool can_hibernate;
-	bool fw_owns_asp1;
 	bool cal_data_valid;
 	s8 cal_index;
 	struct cirrus_amp_cal_data cal_data;
 	struct gpio_desc *reset_gpio;
 };
 
+/* Temporary to avoid a build break with the HDA driver */
+static inline int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base)
+{
+	return 0;
+}
+
 extern struct regmap_config cs35l56_regmap_i2c;
 extern struct regmap_config cs35l56_regmap_spi;
 extern struct regmap_config cs35l56_regmap_sdw;
@@ -284,8 +289,6 @@ extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC];
 extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC];
 
 int cs35l56_set_patch(struct cs35l56_base *cs35l56_base);
-int cs35l56_init_asp1_regs_for_driver_control(struct cs35l56_base *cs35l56_base);
-int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base);
 int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command);
 int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base);
 int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base);
-- 
cgit v1.2.3


From e2996141d6db0d8b353e1c221a37c8e1be109d4a Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Mon, 1 Jul 2024 11:44:43 +0100
Subject: ASoC: cs35l56: Remove support for A1 silicon

No product was ever released with A1 silicon so there is no
need for the driver to include support for it.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://patch.msgid.link/20240701104444.172556-3-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l56.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h
index 2e19722989f7..642ef690ebc2 100644
--- a/include/sound/cs35l56.h
+++ b/include/sound/cs35l56.h
@@ -80,9 +80,7 @@
 #define CS35L56_DSP1_AHBM_WINDOW_DEBUG_1		0x25E2044
 #define CS35L56_DSP1_XMEM_UNPACKED24_0			0x2800000
 #define CS35L56_DSP1_FW_VER				0x2800010
-#define CS35L56_DSP1_HALO_STATE_A1			0x2801E58
 #define CS35L56_DSP1_HALO_STATE				0x28021E0
-#define CS35L56_DSP1_PM_CUR_STATE_A1			0x2804000
 #define CS35L56_DSP1_PM_CUR_STATE			0x2804308
 #define CS35L56_DSP1_XMEM_UNPACKED24_8191		0x2807FFC
 #define CS35L56_DSP1_CORE_BASE				0x2B80000
-- 
cgit v1.2.3


From bca51197620a257e2954be99b16f05115c3b2630 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Sun, 16 Jun 2024 19:08:36 +0300
Subject: RDMA/core: Support IB sub device with type "SMI"

This patch adds 2 APIs, as well as driver operations to support adding
and deleting an IB sub device, which provides part of functionalities
of it's parent.

A sub device has a type; for a sub device with type "SMI", it provides
the smi capability through umad for its parent, meaning uverb is not
supported.

A sub device cannot live without a parent. So when a parent is
released, all it's sub devices are released as well.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/44253f7508b21eb2caefea3980c2bc072869116c.1718553901.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/rdma/ib_verbs.h          | 43 ++++++++++++++++++++++++++++++++++++++++
 include/uapi/rdma/rdma_netlink.h |  5 +++++
 2 files changed, 48 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 825043512b2d..d4128958908f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2663,6 +2663,18 @@ struct ib_device_ops {
 	 */
 	int (*get_numa_node)(struct ib_device *dev);
 
+	/**
+	 * add_sub_dev - Add a sub IB device
+	 */
+	struct ib_device *(*add_sub_dev)(struct ib_device *parent,
+					 enum rdma_nl_dev_type type,
+					 const char *name);
+
+	/**
+	 * del_sub_dev - Delete a sub IB device
+	 */
+	void (*del_sub_dev)(struct ib_device *sub_dev);
+
 	DECLARE_RDMA_OBJ_SIZE(ib_ah);
 	DECLARE_RDMA_OBJ_SIZE(ib_counters);
 	DECLARE_RDMA_OBJ_SIZE(ib_cq);
@@ -2773,6 +2785,15 @@ struct ib_device {
 	char iw_ifname[IFNAMSIZ];
 	u32 iw_driver_flags;
 	u32 lag_flags;
+
+	/* A parent device has a list of sub-devices */
+	struct mutex subdev_lock;
+	struct list_head subdev_list_head;
+
+	/* A sub device has a type and a parent */
+	enum rdma_nl_dev_type type;
+	struct ib_device *parent;
+	struct list_head subdev_list;
 };
 
 static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size,
@@ -4822,4 +4843,26 @@ static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn)
 
 const struct ib_port_immutable*
 ib_port_immutable_read(struct ib_device *dev, unsigned int port);
+
+/** ib_add_sub_device - Add a sub IB device on an existing one
+ *
+ * @parent: The IB device that needs to add a sub device
+ * @type: The type of the new sub device
+ * @name: The name of the new sub device
+ *
+ *
+ * Return 0 on success, an error code otherwise
+ */
+int ib_add_sub_device(struct ib_device *parent,
+		      enum rdma_nl_dev_type type,
+		      const char *name);
+
+
+/** ib_del_sub_device_and_put - Delect an IB sub device while holding a 'get'
+ *
+ * @sub: The sub device that is going to be deleted
+ *
+ * Return 0 on success, an error code otherwise
+ */
+int ib_del_sub_device_and_put(struct ib_device *sub);
 #endif /* IB_VERBS_H */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index a214fc259f28..d15ee16be722 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -602,4 +602,9 @@ enum rdma_nl_counter_mask {
 	RDMA_COUNTER_MASK_QP_TYPE = 1,
 	RDMA_COUNTER_MASK_PID = 1 << 1,
 };
+
+/* Supported rdma device types. */
+enum rdma_nl_dev_type {
+	RDMA_DEVICE_TYPE_SMI = 1,
+};
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From 36e97bbc2dca61751c5b4b7f248cd850a7654a19 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Sun, 16 Jun 2024 19:08:37 +0300
Subject: RDMA: Set type of rdma_ah to IB for a SMI sub device

An address handle created on a SMI port has type IB, as a SMI
port it's used for SMI management through umad.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/195be77aae0cce93522269f22f1303d2ccbef605.1718553901.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/rdma/ib_verbs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index d4128958908f..e09d4f09b602 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4662,6 +4662,8 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
 			return RDMA_AH_ATTR_TYPE_OPA;
 		return RDMA_AH_ATTR_TYPE_IB;
 	}
+	if (dev->type == RDMA_DEVICE_TYPE_SMI)
+		return RDMA_AH_ATTR_TYPE_IB;
 
 	return RDMA_AH_ATTR_TYPE_UNDEFINED;
 }
-- 
cgit v1.2.3


From 060c642b2ab8b40b39f9db99c1d14c7d19ba507f Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Sun, 16 Jun 2024 19:08:40 +0300
Subject: RDMA/nldev: Add support to add/delete a sub IB device through netlink

Add new netlink commands and attributes to support adding and deleting
a sub IB device with admin privilege.

Examples:
$ rdma dev add smi1 type SMI parent ibp8s0f1
$ rdma dev del smi1

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/77cbf1b36359642be8a8d8c5c2f4e585b544282f.1718553901.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/uapi/rdma/rdma_netlink.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index d15ee16be722..bd52fb325e22 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -301,6 +301,10 @@ enum rdma_nldev_command {
 
 	RDMA_NLDEV_CMD_RES_SRQ_GET_RAW,
 
+	RDMA_NLDEV_CMD_NEWDEV,
+
+	RDMA_NLDEV_CMD_DELDEV,
+
 	RDMA_NLDEV_NUM_OPS
 };
 
@@ -564,6 +568,8 @@ enum rdma_nldev_attr {
 	 */
 	RDMA_NLDEV_ATTR_RES_SUBTYPE,		/* string */
 
+	RDMA_NLDEV_ATTR_DEV_TYPE,		/* u8 */
+
 	/*
 	 * Always the end
 	 */
-- 
cgit v1.2.3


From 294424839b5ec2ecd17f4c8409796846b2b8dd31 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Sun, 16 Jun 2024 19:08:41 +0300
Subject: RDMA/nldev: Add support to dump device type and parent device if
 exists

If a device has a specific type or a parent device, dump them as well.

Example:
$ rdma dev show smi1
3: smi1: node_type ca fw 20.38.1002 node_guid 9803:9b03:009f:d5ef sys_image_guid 9803:9b03:009f:d5ee type smi parent ibp8s0f1

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/4c022e3e34b5de1254a3b367d502a362cdd0c53a.1718553901.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/uapi/rdma/rdma_netlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index bd52fb325e22..4b69242d7848 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -570,6 +570,8 @@ enum rdma_nldev_attr {
 
 	RDMA_NLDEV_ATTR_DEV_TYPE,		/* u8 */
 
+	RDMA_NLDEV_ATTR_PARENT_NAME,		/* string */
+
 	/*
 	 * Always the end
 	 */
-- 
cgit v1.2.3


From 3b43399b297c52cfe4dfe0194b6ad89d52bf8c35 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Sun, 16 Jun 2024 19:08:42 +0300
Subject: RDMA/mlx5: Add plane index support when querying PTYS registers

Support the new "plane_ind" field when querying port PTYS registers.
This is needed when querying the rate of a plane port.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/1f703c36306aa46917fcd88eadbb23b3e380d526.1718553901.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/mlx5/port.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 26092c78a985..e68d42b8ce65 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -155,10 +155,11 @@ struct mlx5_port_eth_proto {
 
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
-			 int ptys_size, int proto_mask, u8 local_port);
+			 int ptys_size, int proto_mask,
+			 u8 local_port, u8 plane_index);
 
 int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper,
-			    u16 *proto_oper, u8 local_port);
+			    u16 *proto_oper, u8 local_port, u8 plane_index);
 void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
 int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
 			       enum mlx5_port_status status);
-- 
cgit v1.2.3


From c6b6677d85d47f5562020aa082d9b5f90738fdcd Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Sun, 16 Jun 2024 19:08:43 +0300
Subject: net/mlx5: mlx5_ifc update for accessing ppcnt register of plane ports

This patch adds new fields to support multi-plane and the extend port
counters group. Actual support will be added in the next patch.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/70221cdd79aad0e21cbf385d9567e3ebffbc5137.1718553901.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 47 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 61738990e399..5fea7b747607 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2651,6 +2651,46 @@ struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits {
 	u8         port_xmit_wait[0x20];
 };
 
+struct mlx5_ifc_ib_ext_port_cntrs_grp_data_layout_bits {
+	u8         reserved_at_0[0x300];
+
+	u8         port_xmit_data_high[0x20];
+
+	u8         port_xmit_data_low[0x20];
+
+	u8         port_rcv_data_high[0x20];
+
+	u8         port_rcv_data_low[0x20];
+
+	u8         port_xmit_pkts_high[0x20];
+
+	u8         port_xmit_pkts_low[0x20];
+
+	u8         port_rcv_pkts_high[0x20];
+
+	u8         port_rcv_pkts_low[0x20];
+
+	u8         reserved_at_400[0x80];
+
+	u8         port_unicast_xmit_pkts_high[0x20];
+
+	u8         port_unicast_xmit_pkts_low[0x20];
+
+	u8         port_multicast_xmit_pkts_high[0x20];
+
+	u8         port_multicast_xmit_pkts_low[0x20];
+
+	u8         port_unicast_rcv_pkts_high[0x20];
+
+	u8         port_unicast_rcv_pkts_low[0x20];
+
+	u8         port_multicast_rcv_pkts_high[0x20];
+
+	u8         port_multicast_rcv_pkts_low[0x20];
+
+	u8         reserved_at_580[0x240];
+};
+
 struct mlx5_ifc_eth_per_tc_prio_grp_data_layout_bits {
 	u8         transmit_queue_high[0x20];
 
@@ -4543,6 +4583,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
 	struct mlx5_ifc_eth_per_tc_prio_grp_data_layout_bits eth_per_tc_prio_grp_data_layout;
 	struct mlx5_ifc_eth_per_tc_congest_prio_grp_data_layout_bits eth_per_tc_congest_prio_grp_data_layout;
 	struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout;
+	struct mlx5_ifc_ib_ext_port_cntrs_grp_data_layout_bits ib_ext_port_cntrs_grp_data_layout;
 	struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
 	struct mlx5_ifc_phys_layer_statistical_cntrs_bits phys_layer_statistical_cntrs;
 	u8         reserved_at_0[0x7c0];
@@ -9851,8 +9892,10 @@ struct mlx5_ifc_ppcnt_reg_bits {
 	u8         grp[0x6];
 
 	u8         clr[0x1];
-	u8         reserved_at_21[0x1c];
-	u8         prio_tc[0x3];
+	u8         reserved_at_21[0x13];
+	u8         plane_ind[0x4];
+	u8         reserved_at_38[0x3];
+	u8         prio_tc[0x5];
 
 	union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
 };
-- 
cgit v1.2.3


From 7a2210a57d423cbdb9c5f2e8b12c65d7472ff147 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Sun, 16 Jun 2024 19:08:44 +0300
Subject: RDMA/mlx5: Support per-plane port IB counters by querying PPCNT
 register

Supports per-plane port counters by querying PPCNT register with the
"extended port counters" group, as the query_vport_counter command
doesn't support plane ports.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/06ffb582d67159b7def4654c8272d3d6e8bd2f2f.1718553901.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/mlx5/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index d7bb31d9a446..68bd1b4737ea 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1466,6 +1466,7 @@ enum {
 	MLX5_PER_TRAFFIC_CLASS_CONGESTION_GROUP = 0x13,
 	MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP = 0x16,
 	MLX5_INFINIBAND_PORT_COUNTERS_GROUP   = 0x20,
+	MLX5_INFINIBAND_EXTENDED_PORT_COUNTERS_GROUP = 0x21,
 };
 
 enum {
-- 
cgit v1.2.3


From 762ced1630a97a457ad2fd5f5a36849009808431 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Mon, 1 Jul 2024 14:39:50 +0200
Subject: HID: bpf: fix gcc warning and unify __u64 into u64

I've got multiple reports of:
error: cast from pointer to integer of different size
[-Werror=pointer-to-int-cast].

Let's use the same trick than kernel/bpf/helpers.c to shut up that warning.

Even if we were on an architecture with addresses on more than 64 bits,
this isn't much of an issue as the address is not used as a pointer,
but as an hash and the caller is not supposed to go back to the kernel
address ever.

And while we change those, make sure we use u64 instead of __u64 for
consistency

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202406280633.OPB5uIFj-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202406282304.UydSVncq-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202406282242.Fk738zzy-lkp@intel.com/
Reported-by: Mirsad Todorovac <mtodorovac69@gmail.com>
Fixes: 67eccf151d76 ("HID: add source argument to HID low level functions")
Link: https://patch.msgid.link/20240701-fix-cki-v2-2-20564e2e1393@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index 93546ee7677a..3f6584014311 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -68,9 +68,9 @@ struct hid_ops {
 				  unsigned char reportnum, __u8 *buf,
 				  size_t len, enum hid_report_type rtype,
 				  enum hid_class_request reqtype,
-				  __u64 source, bool from_bpf);
+				  u64 source, bool from_bpf);
 	int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len,
-				    __u64 source, bool from_bpf);
+				    u64 source, bool from_bpf);
 	int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type,
 				u8 *data, u32 size, int interrupt, u64 source, bool from_bpf,
 				bool lock_already_taken);
@@ -115,7 +115,7 @@ struct hid_bpf_ops {
 	 * Context: Interrupt context.
 	 */
 	int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type,
-				__u64 source);
+				u64 source);
 
 	/**
 	 * @hid_rdesc_fixup: called when the probe function parses the report descriptor
-- 
cgit v1.2.3


From 260ffc9676b635c2ededc39285bfa41f83536ee1 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Mon, 1 Jul 2024 14:39:51 +0200
Subject: HID: bpf: doc fixes for hid_hw_request() hooks

We had the following errors while doing make htmldocs:
Documentation/hid/hid-bpf:185: include/linux/hid_bpf.h:144:
	ERROR: Unexpected indentation.
Documentation/hid/hid-bpf:185: include/linux/hid_bpf.h:145:
	WARNING: Block quote ends without a blank line;
	unexpected unindent.
Documentation/hid/hid-bpf:185: include/linux/hid_bpf.h:147:
	ERROR: Unexpected indentation.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Fixes: 8bd0488b5ea5 ("HID: bpf: add HID-BPF hooks for hid_hw_raw_requests")
Link: https://patch.msgid.link/20240701-fix-cki-v2-3-20564e2e1393@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index 3f6584014311..c30c31b79419 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -139,12 +139,15 @@ struct hid_bpf_ops {
 	 *
 	 * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx
 	 * ``reportnum``: the report number, as in hid_hw_raw_request()
+	 *
 	 * ``rtype``: the report type (``HID_INPUT_REPORT``, ``HID_FEATURE_REPORT``,
-	 *            ``HID_OUTPUT_REPORT``)
+	 * ``HID_OUTPUT_REPORT``)
+	 *
 	 * ``reqtype``: the request
+	 *
 	 * ``source``: a u64 referring to a uniq but identifiable source. If %0, the
-	 *             kernel itself emitted that call. For hidraw, ``source`` is set
-	 *             to the associated ``struct file *``.
+	 * kernel itself emitted that call. For hidraw, ``source`` is set
+	 * to the associated ``struct file *``.
 	 *
 	 * Return: %0 to keep processing the request by hid-core; any other value
 	 * stops hid-core from processing that event. A positive value should be
@@ -153,7 +156,7 @@ struct hid_bpf_ops {
 	 */
 	int (*hid_hw_request)(struct hid_bpf_ctx *ctx, unsigned char reportnum,
 			       enum hid_report_type rtype, enum hid_class_request reqtype,
-			       __u64 source);
+			       u64 source);
 
 	/**
 	 * @hid_hw_output_report: called whenever a hid_hw_output_report() call is emitted
-- 
cgit v1.2.3


From c79de517a226b86419a5baa867e65e3f8118829f Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <bentiss@kernel.org>
Date: Mon, 1 Jul 2024 14:39:52 +0200
Subject: HID: bpf: doc fixes for hid_hw_request() hooks

We had the following errors while doing make htmldocs:

Documentation/hid/hid-bpf:185: include/linux/hid_bpf.h:167:
	ERROR: Unexpected indentation.

Also ensure consistency with the rest of the __u64 vs u64.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Fixes: 9286675a2aed ("HID: bpf: add HID-BPF hooks for hid_hw_output_report")
Link: https://patch.msgid.link/20240701-fix-cki-v2-4-20564e2e1393@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 include/linux/hid_bpf.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index c30c31b79419..9ca96fc90449 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -138,6 +138,7 @@ struct hid_bpf_ops {
 	 * It has the following arguments:
 	 *
 	 * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx
+	 *
 	 * ``reportnum``: the report number, as in hid_hw_raw_request()
 	 *
 	 * ``rtype``: the report type (``HID_INPUT_REPORT``, ``HID_FEATURE_REPORT``,
@@ -165,16 +166,17 @@ struct hid_bpf_ops {
 	 * It has the following arguments:
 	 *
 	 * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx
+	 *
 	 * ``source``: a u64 referring to a uniq but identifiable source. If %0, the
-	 *             kernel itself emitted that call. For hidraw, ``source`` is set
-	 *             to the associated ``struct file *``.
+	 * kernel itself emitted that call. For hidraw, ``source`` is set
+	 * to the associated ``struct file *``.
 	 *
 	 * Return: %0 to keep processing the request by hid-core; any other value
 	 * stops hid-core from processing that event. A positive value should be
 	 * returned with the number of bytes written to the device; a negative error
 	 * code interrupts the processing of this call.
 	 */
-	int (*hid_hw_output_report)(struct hid_bpf_ctx *ctx, __u64 source);
+	int (*hid_hw_output_report)(struct hid_bpf_ctx *ctx, u64 source);
 
 
 	/* private: do not show up in the docs */
@@ -203,9 +205,9 @@ int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
 				  unsigned char reportnum, __u8 *buf,
 				  u32 size, enum hid_report_type rtype,
 				  enum hid_class_request reqtype,
-				  __u64 source, bool from_bpf);
+				  u64 source, bool from_bpf);
 int dispatch_hid_bpf_output_report(struct hid_device *hdev, __u8 *buf, u32 size,
-				   __u64 source, bool from_bpf);
+				   u64 source, bool from_bpf);
 int hid_bpf_connect_device(struct hid_device *hdev);
 void hid_bpf_disconnect_device(struct hid_device *hdev);
 void hid_bpf_destroy_device(struct hid_device *hid);
@@ -221,7 +223,7 @@ static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
 						enum hid_class_request reqtype,
 						u64 source, bool from_bpf) { return 0; }
 static inline int dispatch_hid_bpf_output_report(struct hid_device *hdev, __u8 *buf, u32 size,
-						 __u64 source, bool from_bpf) { return 0; }
+						 u64 source, bool from_bpf) { return 0; }
 static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; }
 static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {}
 static inline void hid_bpf_destroy_device(struct hid_device *hid) {}
-- 
cgit v1.2.3


From 63e2f40c9e3187641afacde4153f54b3ee4dbc8c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 29 Jun 2024 21:48:41 +0200
Subject: syscalls: fix sys_fanotify_mark prototype

My earlier fix missed an incorrect function prototype that shows up on
native 32-bit builds:

In file included from fs/notify/fanotify/fanotify_user.c:14:
include/linux/syscalls.h:248:25: error: conflicting types for 'sys_fanotify_mark'; have 'long int(int,  unsigned int,  u32,  u32,  int,  const char *)' {aka 'long int(int,  unsigned int,  unsigned int,  unsigned int,  int,  const char *)'}
 1924 | SYSCALL32_DEFINE6(fanotify_mark,
      | ^~~~~~~~~~~~~~~~~
include/linux/syscalls.h:862:17: note: previous declaration of 'sys_fanotify_mark' with type 'long int(int,  unsigned int,  u64,  int, const char *)' {aka 'long int(int,  unsigned int,  long long unsigned int,  int,  const char *)'}

On x86 and powerpc, the prototype is also wrong but hidden in an #ifdef,
so it never caused problems.

Add another alternative declaration that matches the conditional function
definition.

Fixes: 403f17a33073 ("parisc: use generic sys_fanotify_mark implementation")
Cc: stable@vger.kernel.org
Reported-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/syscalls.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 63424af87bba..fff820c3e93e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -859,9 +859,15 @@ asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource,
 				const struct rlimit64 __user *new_rlim,
 				struct rlimit64 __user *old_rlim);
 asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags);
+#if defined(CONFIG_ARCH_SPLIT_ARG64)
+asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
+                                unsigned int mask_1, unsigned int mask_2,
+				int dfd, const char  __user * pathname);
+#else
 asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
 				  u64 mask, int fd,
 				  const char  __user *pathname);
+#endif
 asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
 				      struct file_handle __user *handle,
 				      int __user *mnt_id, int flag);
-- 
cgit v1.2.3


From 2681bbaa39cc2b5711494cc7e0166538f24e9c16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sun, 30 Jun 2024 22:54:08 +0200
Subject: ACPI: battery: add devm_battery_hook_register()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a utility function for device-managed registration of battery hooks.
The function makes it easier to manage the lifecycle of a hook.

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20240630-cros_ec-charge-control-v5-1-8f649d018c52@weissschuh.net
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 include/acpi/battery.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/acpi/battery.h b/include/acpi/battery.h
index 611a2561a014..c93f16dfb944 100644
--- a/include/acpi/battery.h
+++ b/include/acpi/battery.h
@@ -2,6 +2,7 @@
 #ifndef __ACPI_BATTERY_H
 #define __ACPI_BATTERY_H
 
+#include <linux/device.h>
 #include <linux/power_supply.h>
 
 #define ACPI_BATTERY_CLASS "battery"
@@ -19,5 +20,6 @@ struct acpi_battery_hook {
 
 void battery_hook_register(struct acpi_battery_hook *hook);
 void battery_hook_unregister(struct acpi_battery_hook *hook);
+int devm_battery_hook_register(struct device *dev, struct acpi_battery_hook *hook);
 
 #endif
-- 
cgit v1.2.3


From c05cb5bdf413a2a5051701a397082380758e37ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sun, 30 Jun 2024 22:54:09 +0200
Subject: platform/chrome: Update binary interface for EC-based charge control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The charge-control command v2/v3 is more featureful than v1, it
additionally supports charge thresholds.

The definitions were imported from ChromeOS EC commit
32870d602317 ("squirtle: modify motionsense rotation matrix")

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20240630-cros_ec-charge-control-v5-2-8f649d018c52@weissschuh.net
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 include/linux/platform_data/cros_ec_commands.h | 49 ++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
index ec598057d1da..e574b790be6f 100644
--- a/include/linux/platform_data/cros_ec_commands.h
+++ b/include/linux/platform_data/cros_ec_commands.h
@@ -3843,16 +3843,61 @@ struct ec_params_i2c_write {
  * discharge the battery.
  */
 #define EC_CMD_CHARGE_CONTROL 0x0096
-#define EC_VER_CHARGE_CONTROL 1
+#define EC_VER_CHARGE_CONTROL 3
 
 enum ec_charge_control_mode {
 	CHARGE_CONTROL_NORMAL = 0,
 	CHARGE_CONTROL_IDLE,
 	CHARGE_CONTROL_DISCHARGE,
+	/* Add no more entry below. */
+	CHARGE_CONTROL_COUNT,
+};
+
+#define EC_CHARGE_MODE_TEXT                               \
+	{                                                 \
+		[CHARGE_CONTROL_NORMAL] = "NORMAL",       \
+		[CHARGE_CONTROL_IDLE] = "IDLE",           \
+		[CHARGE_CONTROL_DISCHARGE] = "DISCHARGE", \
+	}
+
+enum ec_charge_control_cmd {
+	EC_CHARGE_CONTROL_CMD_SET = 0,
+	EC_CHARGE_CONTROL_CMD_GET,
+};
+
+enum ec_charge_control_flag {
+	EC_CHARGE_CONTROL_FLAG_NO_IDLE = BIT(0),
 };
 
 struct ec_params_charge_control {
-	uint32_t mode;  /* enum charge_control_mode */
+	uint32_t mode; /* enum charge_control_mode */
+
+	/* Below are the fields added in V2. */
+	uint8_t cmd; /* enum ec_charge_control_cmd. */
+	uint8_t flags; /* enum ec_charge_control_flag (v3+) */
+	/*
+	 * Lower and upper thresholds for battery sustainer. This struct isn't
+	 * named to avoid tainting foreign projects' name spaces.
+	 *
+	 * If charge mode is explicitly set (e.g. DISCHARGE), battery sustainer
+	 * will be disabled. To disable battery sustainer, set mode=NORMAL,
+	 * lower=-1, upper=-1.
+	 */
+	struct {
+		int8_t lower; /* Display SoC in percentage. */
+		int8_t upper; /* Display SoC in percentage. */
+	} sustain_soc;
+} __ec_align4;
+
+/* Added in v2 */
+struct ec_response_charge_control {
+	uint32_t mode; /* enum charge_control_mode */
+	struct { /* Battery sustainer thresholds */
+		int8_t lower;
+		int8_t upper;
+	} sustain_soc;
+	uint8_t flags; /* enum ec_charge_control_flag (v3+) */
+	uint8_t reserved;
 } __ec_align4;
 
 /*****************************************************************************/
-- 
cgit v1.2.3


From 69a13742b7c64ed89894caf7091539e164b3e97c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sun, 30 Jun 2024 22:54:10 +0200
Subject: platform/chrome: cros_ec_proto: Introduce cros_ec_get_cmd_versions()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Retrieving the supported versions of a command is a fairly common
operation. Provide a helper for it.

If the command is not supported at all the EC returns
-EINVAL/EC_RES_INVALID_PARAMS.

This error is translated into an empty version mask as that is easier to
handle for callers and they don't need to know about the error details.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20240630-cros_ec-charge-control-v5-3-8f649d018c52@weissschuh.net
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
---
 include/linux/platform_data/cros_ec_proto.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 6e9225bdf903..b34ed0cc1f8d 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -263,6 +263,8 @@ int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command
 
 int cros_ec_cmd_readmem(struct cros_ec_device *ec_dev, u8 offset, u8 size, void *dest);
 
+int cros_ec_get_cmd_versions(struct cros_ec_device *ec_dev, u16 cmd);
+
 /**
  * cros_ec_get_time_ns() - Return time in ns.
  *
-- 
cgit v1.2.3


From 992f1a3d4e88498de04b0b13b94705d8540f3d81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Beh=C3=BAn?= <kabel@kernel.org>
Date: Mon, 1 Jul 2024 13:30:04 +0200
Subject: platform: cznic: Add preliminary support for Turris Omnia MCU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the basic skeleton for a new platform driver for the microcontroller
found on the Turris Omnia board.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Acked-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Link: https://lore.kernel.org/r/20240701113010.16447-3-kabel@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/turris-omnia-mcu-interface.h | 249 +++++++++++++++++++++++++++++
 1 file changed, 249 insertions(+)
 create mode 100644 include/linux/turris-omnia-mcu-interface.h

(limited to 'include')

diff --git a/include/linux/turris-omnia-mcu-interface.h b/include/linux/turris-omnia-mcu-interface.h
new file mode 100644
index 000000000000..2da8cbeb158a
--- /dev/null
+++ b/include/linux/turris-omnia-mcu-interface.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * CZ.NIC's Turris Omnia MCU I2C interface commands definitions
+ *
+ * 2024 by Marek Behún <kabel@kernel.org>
+ */
+
+#ifndef __TURRIS_OMNIA_MCU_INTERFACE_H
+#define __TURRIS_OMNIA_MCU_INTERFACE_H
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+
+enum omnia_commands_e {
+	OMNIA_CMD_GET_STATUS_WORD		= 0x01, /* slave sends status word back */
+	OMNIA_CMD_GENERAL_CONTROL		= 0x02,
+	OMNIA_CMD_LED_MODE			= 0x03, /* default/user */
+	OMNIA_CMD_LED_STATE			= 0x04, /* LED on/off */
+	OMNIA_CMD_LED_COLOR			= 0x05, /* LED number + RED + GREEN + BLUE */
+	OMNIA_CMD_USER_VOLTAGE			= 0x06,
+	OMNIA_CMD_SET_BRIGHTNESS		= 0x07,
+	OMNIA_CMD_GET_BRIGHTNESS		= 0x08,
+	OMNIA_CMD_GET_RESET			= 0x09,
+	OMNIA_CMD_GET_FW_VERSION_APP		= 0x0A, /* 20B git hash number */
+	OMNIA_CMD_SET_WATCHDOG_STATE		= 0x0B, /* 0 - disable
+							 * 1 - enable / ping
+							 * after boot watchdog is started
+							 * with 2 minutes timeout
+							 */
+
+	/* OMNIA_CMD_WATCHDOG_STATUS		= 0x0C, not implemented anymore */
+
+	OMNIA_CMD_GET_WATCHDOG_STATE		= 0x0D,
+	OMNIA_CMD_GET_FW_VERSION_BOOT		= 0x0E, /* 20B Git hash number */
+	OMNIA_CMD_GET_FW_CHECKSUM		= 0x0F, /* 4B length, 4B checksum */
+
+	/* available if FEATURES_SUPPORTED bit set in status word */
+	OMNIA_CMD_GET_FEATURES			= 0x10,
+
+	/* available if EXT_CMD bit set in features */
+	OMNIA_CMD_GET_EXT_STATUS_DWORD		= 0x11,
+	OMNIA_CMD_EXT_CONTROL			= 0x12,
+	OMNIA_CMD_GET_EXT_CONTROL_STATUS	= 0x13,
+
+	/* available if NEW_INT_API bit set in features */
+	OMNIA_CMD_GET_INT_AND_CLEAR		= 0x14,
+	OMNIA_CMD_GET_INT_MASK			= 0x15,
+	OMNIA_CMD_SET_INT_MASK			= 0x16,
+
+	/* available if FLASHING bit set in features */
+	OMNIA_CMD_FLASH				= 0x19,
+
+	/* available if WDT_PING bit set in features */
+	OMNIA_CMD_SET_WDT_TIMEOUT		= 0x20,
+	OMNIA_CMD_GET_WDT_TIMELEFT		= 0x21,
+
+	/* available if POWEROFF_WAKEUP bit set in features */
+	OMNIA_CMD_SET_WAKEUP			= 0x22,
+	OMNIA_CMD_GET_UPTIME_AND_WAKEUP		= 0x23,
+	OMNIA_CMD_POWER_OFF			= 0x24,
+
+	/* available if USB_OVC_PROT_SETTING bit set in features */
+	OMNIA_CMD_SET_USB_OVC_PROT		= 0x25,
+	OMNIA_CMD_GET_USB_OVC_PROT		= 0x26,
+
+	/* available if TRNG bit set in features */
+	OMNIA_CMD_TRNG_COLLECT_ENTROPY		= 0x28,
+
+	/* available if CRYPTO bit set in features */
+	OMNIA_CMD_CRYPTO_GET_PUBLIC_KEY		= 0x29,
+	OMNIA_CMD_CRYPTO_SIGN_MESSAGE		= 0x2A,
+	OMNIA_CMD_CRYPTO_COLLECT_SIGNATURE	= 0x2B,
+
+	/* available if BOARD_INFO it set in features */
+	OMNIA_CMD_BOARD_INFO_GET		= 0x2C,
+	OMNIA_CMD_BOARD_INFO_BURN		= 0x2D,
+
+	/* available only at address 0x2b (LED-controller) */
+	/* available only if LED_GAMMA_CORRECTION bit set in features */
+	OMNIA_CMD_SET_GAMMA_CORRECTION		= 0x30,
+	OMNIA_CMD_GET_GAMMA_CORRECTION		= 0x31,
+
+	/* available only at address 0x2b (LED-controller) */
+	/* available only if PER_LED_CORRECTION bit set in features */
+	/* available only if FROM_BIT_16_INVALID bit NOT set in features */
+	OMNIA_CMD_SET_LED_CORRECTIONS		= 0x32,
+	OMNIA_CMD_GET_LED_CORRECTIONS		= 0x33,
+};
+
+enum omnia_flashing_commands_e {
+	OMNIA_FLASH_CMD_UNLOCK		= 0x01,
+	OMNIA_FLASH_CMD_SIZE_AND_CSUM	= 0x02,
+	OMNIA_FLASH_CMD_PROGRAM		= 0x03,
+	OMNIA_FLASH_CMD_RESET		= 0x04,
+};
+
+enum omnia_sts_word_e {
+	OMNIA_STS_MCU_TYPE_MASK			= GENMASK(1, 0),
+	OMNIA_STS_MCU_TYPE_STM32		= FIELD_PREP_CONST(OMNIA_STS_MCU_TYPE_MASK, 0),
+	OMNIA_STS_MCU_TYPE_GD32			= FIELD_PREP_CONST(OMNIA_STS_MCU_TYPE_MASK, 1),
+	OMNIA_STS_MCU_TYPE_MKL			= FIELD_PREP_CONST(OMNIA_STS_MCU_TYPE_MASK, 2),
+	OMNIA_STS_FEATURES_SUPPORTED		= BIT(2),
+	OMNIA_STS_USER_REGULATOR_NOT_SUPPORTED	= BIT(3),
+	OMNIA_STS_CARD_DET			= BIT(4),
+	OMNIA_STS_MSATA_IND			= BIT(5),
+	OMNIA_STS_USB30_OVC			= BIT(6),
+	OMNIA_STS_USB31_OVC			= BIT(7),
+	OMNIA_STS_USB30_PWRON			= BIT(8),
+	OMNIA_STS_USB31_PWRON			= BIT(9),
+	OMNIA_STS_ENABLE_4V5			= BIT(10),
+	OMNIA_STS_BUTTON_MODE			= BIT(11),
+	OMNIA_STS_BUTTON_PRESSED		= BIT(12),
+	OMNIA_STS_BUTTON_COUNTER_MASK		= GENMASK(15, 13),
+};
+
+enum omnia_ctl_byte_e {
+	OMNIA_CTL_LIGHT_RST	= BIT(0),
+	OMNIA_CTL_HARD_RST	= BIT(1),
+	/* BIT(2) is currently reserved */
+	OMNIA_CTL_USB30_PWRON	= BIT(3),
+	OMNIA_CTL_USB31_PWRON	= BIT(4),
+	OMNIA_CTL_ENABLE_4V5	= BIT(5),
+	OMNIA_CTL_BUTTON_MODE	= BIT(6),
+	OMNIA_CTL_BOOTLOADER	= BIT(7),
+};
+
+enum omnia_features_e {
+	OMNIA_FEAT_PERIPH_MCU		= BIT(0),
+	OMNIA_FEAT_EXT_CMDS		= BIT(1),
+	OMNIA_FEAT_WDT_PING		= BIT(2),
+	OMNIA_FEAT_LED_STATE_EXT_MASK	= GENMASK(4, 3),
+	OMNIA_FEAT_LED_STATE_EXT	= FIELD_PREP_CONST(OMNIA_FEAT_LED_STATE_EXT_MASK, 1),
+	OMNIA_FEAT_LED_STATE_EXT_V32	= FIELD_PREP_CONST(OMNIA_FEAT_LED_STATE_EXT_MASK, 2),
+	OMNIA_FEAT_LED_GAMMA_CORRECTION	= BIT(5),
+	OMNIA_FEAT_NEW_INT_API		= BIT(6),
+	OMNIA_FEAT_BOOTLOADER		= BIT(7),
+	OMNIA_FEAT_FLASHING		= BIT(8),
+	OMNIA_FEAT_NEW_MESSAGE_API	= BIT(9),
+	OMNIA_FEAT_BRIGHTNESS_INT	= BIT(10),
+	OMNIA_FEAT_POWEROFF_WAKEUP	= BIT(11),
+	OMNIA_FEAT_CAN_OLD_MESSAGE_API	= BIT(12),
+	OMNIA_FEAT_TRNG			= BIT(13),
+	OMNIA_FEAT_CRYPTO		= BIT(14),
+	OMNIA_FEAT_BOARD_INFO		= BIT(15),
+
+	/*
+	 * Orginally the features command replied only 16 bits. If more were
+	 * read, either the I2C transaction failed or 0xff bytes were sent.
+	 * Therefore to consider bits 16 - 31 valid, one bit (20) was reserved
+	 * to be zero.
+	 */
+
+	/* Bits 16 - 19 correspond to bits 0 - 3 of status word */
+	OMNIA_FEAT_MCU_TYPE_MASK		= GENMASK(17, 16),
+	OMNIA_FEAT_MCU_TYPE_STM32		= FIELD_PREP_CONST(OMNIA_FEAT_MCU_TYPE_MASK, 0),
+	OMNIA_FEAT_MCU_TYPE_GD32		= FIELD_PREP_CONST(OMNIA_FEAT_MCU_TYPE_MASK, 1),
+	OMNIA_FEAT_MCU_TYPE_MKL			= FIELD_PREP_CONST(OMNIA_FEAT_MCU_TYPE_MASK, 2),
+	OMNIA_FEAT_FEATURES_SUPPORTED		= BIT(18),
+	OMNIA_FEAT_USER_REGULATOR_NOT_SUPPORTED	= BIT(19),
+
+	/* must not be set */
+	OMNIA_FEAT_FROM_BIT_16_INVALID	= BIT(20),
+
+	OMNIA_FEAT_PER_LED_CORRECTION	= BIT(21),
+	OMNIA_FEAT_USB_OVC_PROT_SETTING	= BIT(22),
+};
+
+enum omnia_ext_sts_dword_e {
+	OMNIA_EXT_STS_SFP_nDET		= BIT(0),
+	OMNIA_EXT_STS_LED_STATES_MASK	= GENMASK(31, 12),
+	OMNIA_EXT_STS_WLAN0_MSATA_LED	= BIT(12),
+	OMNIA_EXT_STS_WLAN1_LED		= BIT(13),
+	OMNIA_EXT_STS_WLAN2_LED		= BIT(14),
+	OMNIA_EXT_STS_WPAN0_LED		= BIT(15),
+	OMNIA_EXT_STS_WPAN1_LED		= BIT(16),
+	OMNIA_EXT_STS_WPAN2_LED		= BIT(17),
+	OMNIA_EXT_STS_WAN_LED0		= BIT(18),
+	OMNIA_EXT_STS_WAN_LED1		= BIT(19),
+	OMNIA_EXT_STS_LAN0_LED0		= BIT(20),
+	OMNIA_EXT_STS_LAN0_LED1		= BIT(21),
+	OMNIA_EXT_STS_LAN1_LED0		= BIT(22),
+	OMNIA_EXT_STS_LAN1_LED1		= BIT(23),
+	OMNIA_EXT_STS_LAN2_LED0		= BIT(24),
+	OMNIA_EXT_STS_LAN2_LED1		= BIT(25),
+	OMNIA_EXT_STS_LAN3_LED0		= BIT(26),
+	OMNIA_EXT_STS_LAN3_LED1		= BIT(27),
+	OMNIA_EXT_STS_LAN4_LED0		= BIT(28),
+	OMNIA_EXT_STS_LAN4_LED1		= BIT(29),
+	OMNIA_EXT_STS_LAN5_LED0		= BIT(30),
+	OMNIA_EXT_STS_LAN5_LED1		= BIT(31),
+};
+
+enum omnia_ext_ctl_e {
+	OMNIA_EXT_CTL_nRES_MMC		= BIT(0),
+	OMNIA_EXT_CTL_nRES_LAN		= BIT(1),
+	OMNIA_EXT_CTL_nRES_PHY		= BIT(2),
+	OMNIA_EXT_CTL_nPERST0		= BIT(3),
+	OMNIA_EXT_CTL_nPERST1		= BIT(4),
+	OMNIA_EXT_CTL_nPERST2		= BIT(5),
+	OMNIA_EXT_CTL_PHY_SFP		= BIT(6),
+	OMNIA_EXT_CTL_PHY_SFP_AUTO	= BIT(7),
+	OMNIA_EXT_CTL_nVHV_CTRL		= BIT(8),
+};
+
+enum omnia_int_e {
+	OMNIA_INT_CARD_DET		= BIT(0),
+	OMNIA_INT_MSATA_IND		= BIT(1),
+	OMNIA_INT_USB30_OVC		= BIT(2),
+	OMNIA_INT_USB31_OVC		= BIT(3),
+	OMNIA_INT_BUTTON_PRESSED	= BIT(4),
+	OMNIA_INT_SFP_nDET		= BIT(5),
+	OMNIA_INT_BRIGHTNESS_CHANGED	= BIT(6),
+	OMNIA_INT_TRNG			= BIT(7),
+	OMNIA_INT_MESSAGE_SIGNED	= BIT(8),
+
+	OMNIA_INT_LED_STATES_MASK	= GENMASK(31, 12),
+	OMNIA_INT_WLAN0_MSATA_LED	= BIT(12),
+	OMNIA_INT_WLAN1_LED		= BIT(13),
+	OMNIA_INT_WLAN2_LED		= BIT(14),
+	OMNIA_INT_WPAN0_LED		= BIT(15),
+	OMNIA_INT_WPAN1_LED		= BIT(16),
+	OMNIA_INT_WPAN2_LED		= BIT(17),
+	OMNIA_INT_WAN_LED0		= BIT(18),
+	OMNIA_INT_WAN_LED1		= BIT(19),
+	OMNIA_INT_LAN0_LED0		= BIT(20),
+	OMNIA_INT_LAN0_LED1		= BIT(21),
+	OMNIA_INT_LAN1_LED0		= BIT(22),
+	OMNIA_INT_LAN1_LED1		= BIT(23),
+	OMNIA_INT_LAN2_LED0		= BIT(24),
+	OMNIA_INT_LAN2_LED1		= BIT(25),
+	OMNIA_INT_LAN3_LED0		= BIT(26),
+	OMNIA_INT_LAN3_LED1		= BIT(27),
+	OMNIA_INT_LAN4_LED0		= BIT(28),
+	OMNIA_INT_LAN4_LED1		= BIT(29),
+	OMNIA_INT_LAN5_LED0		= BIT(30),
+	OMNIA_INT_LAN5_LED1		= BIT(31),
+};
+
+enum omnia_cmd_poweroff_e {
+	OMNIA_CMD_POWER_OFF_POWERON_BUTTON	= BIT(0),
+	OMNIA_CMD_POWER_OFF_MAGIC		= 0xdead,
+};
+
+enum omnia_cmd_usb_ovc_prot_e {
+	OMNIA_CMD_xET_USB_OVC_PROT_PORT_MASK	= GENMASK(3, 0),
+	OMNIA_CMD_xET_USB_OVC_PROT_ENABLE	= BIT(4),
+};
+
+#endif /* __TURRIS_OMNIA_MCU_INTERFACE_H */
-- 
cgit v1.2.3


From 89cc8f1c5f22568142b7ad118c738204708e4207 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 30 Jun 2024 00:26:48 +0200
Subject: netfilter: nf_tables: Add flowtable map for xdp offload

This adds a small internal mapping table so that a new bpf (xdp) kfunc
can perform lookups in a flowtable.

As-is, xdp program has access to the device pointer, but no way to do a
lookup in a flowtable -- there is no way to obtain the needed struct
without questionable stunts.

This allows to obtain an nf_flowtable pointer given a net_device
structure.

In order to keep backward compatibility, the infrastructure allows the
user to add a given device to multiple flowtables, but it will always
return the first added mapping performing the lookup since it assumes
the right configuration is 1:1 mapping between flowtables and net_devices.

Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Link: https://lore.kernel.org/bpf/9f20e2c36f494b3bf177328718367f636bb0b2ab.1719698275.git.lorenzo@kernel.org
---
 include/net/netfilter/nf_flow_table.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 9abb7ee40d72..d845745207d2 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -305,6 +305,11 @@ struct flow_ports {
 	__be16 source, dest;
 };
 
+struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev);
+int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
+			      struct net_device *dev,
+			      enum flow_block_command cmd);
+
 unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 				     const struct nf_hook_state *state);
 unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
-- 
cgit v1.2.3


From 391bb6594fd3a567efb1cd3efc8136c78c4c9e31 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 30 Jun 2024 00:26:49 +0200
Subject: netfilter: Add bpf_xdp_flow_lookup kfunc

Introduce bpf_xdp_flow_lookup kfunc in order to perform the lookup
of a given flowtable entry based on a fib tuple of incoming traffic.
bpf_xdp_flow_lookup can be used as building block to offload in xdp
the processing of sw flowtable when hw flowtable is not available.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Link: https://lore.kernel.org/bpf/55d38a4e5856f6d1509d823ff4e98aaa6d356097.1719698275.git.lorenzo@kernel.org
---
 include/net/netfilter/nf_flow_table.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index d845745207d2..b63d53bb9dd6 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -315,6 +315,16 @@ unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 				       const struct nf_hook_state *state);
 
+#if (IS_BUILTIN(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+    (IS_MODULE(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+extern int nf_flow_register_bpf(void);
+#else
+static inline int nf_flow_register_bpf(void)
+{
+	return 0;
+}
+#endif
+
 #define MODULE_ALIAS_NF_FLOWTABLE(family)	\
 	MODULE_ALIAS("nf-flowtable-" __stringify(family))
 
-- 
cgit v1.2.3


From face1c543e89ebc657f7948b0541a76954cf9382 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Thu, 20 Jun 2024 21:14:10 +0200
Subject: ACPI: bus: Indicate support for battery charge limiting thru _OSC

The ACPI battery driver can handle the "charge limiting" state
of the battery, so the platform can advertise this state.

Indicate this by setting bit 19 ("Battery Charge Limiting Support")
when evaluating _OSC.

Tested on a Lenovo Ideapad S145-14IWL.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20240620191410.3646-2-W_Armin@gmx.de
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 28c3fb2bef0d..b87ef8f3caa0 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -576,6 +576,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 #define OSC_SB_CPC_FLEXIBLE_ADR_SPACE		0x00004000
 #define OSC_SB_GENERIC_INITIATOR_SUPPORT	0x00020000
 #define OSC_SB_NATIVE_USB4_SUPPORT		0x00040000
+#define OSC_SB_BATTERY_CHARGE_LIMITING_SUPPORT	0x00080000
 #define OSC_SB_PRM_SUPPORT			0x00200000
 #define OSC_SB_FFH_OPR_SUPPORT			0x00400000
 
-- 
cgit v1.2.3


From 4d8aa430624006ba06254aa607f8756a75e42c8d Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Tue, 4 Jun 2024 14:30:04 +0200
Subject: dt-bindings: iio: adc: Add MediaTek MT6359 PMIC AUXADC

Add a new binding for the MT6350 Series (MT6357/8/9) PMIC AUXADC,
providing various ADC channels for both internal temperatures and
voltages, audio accessory detection (hp/mic/hp+mic and buttons,
usually on a 3.5mm jack) other than some basic battery statistics
on boards where the battery is managed by this PMIC.

Also add the necessary dt-binding headers for devicetree consumers.

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://patch.msgid.link/20240604123008.327424-2-angelogioacchino.delregno@collabora.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../dt-bindings/iio/adc/mediatek,mt6357-auxadc.h   | 21 +++++++++++++++++++++
 .../dt-bindings/iio/adc/mediatek,mt6358-auxadc.h   | 22 ++++++++++++++++++++++
 .../dt-bindings/iio/adc/mediatek,mt6359-auxadc.h   | 22 ++++++++++++++++++++++
 3 files changed, 65 insertions(+)
 create mode 100644 include/dt-bindings/iio/adc/mediatek,mt6357-auxadc.h
 create mode 100644 include/dt-bindings/iio/adc/mediatek,mt6358-auxadc.h
 create mode 100644 include/dt-bindings/iio/adc/mediatek,mt6359-auxadc.h

(limited to 'include')

diff --git a/include/dt-bindings/iio/adc/mediatek,mt6357-auxadc.h b/include/dt-bindings/iio/adc/mediatek,mt6357-auxadc.h
new file mode 100644
index 000000000000..03ebb1d23953
--- /dev/null
+++ b/include/dt-bindings/iio/adc/mediatek,mt6357-auxadc.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+
+#ifndef _DT_BINDINGS_MEDIATEK_MT6357_AUXADC_H
+#define _DT_BINDINGS_MEDIATEK_MT6357_AUXADC_H
+
+/* ADC Channel Index */
+#define MT6357_AUXADC_BATADC		0
+#define MT6357_AUXADC_ISENSE		1
+#define MT6357_AUXADC_VCDT		2
+#define MT6357_AUXADC_BAT_TEMP		3
+#define MT6357_AUXADC_CHIP_TEMP		4
+#define MT6357_AUXADC_ACCDET		5
+#define MT6357_AUXADC_VDCXO		6
+#define MT6357_AUXADC_TSX_TEMP		7
+#define MT6357_AUXADC_HPOFS_CAL		8
+#define MT6357_AUXADC_DCXO_TEMP		9
+#define MT6357_AUXADC_VCORE_TEMP	10
+#define MT6357_AUXADC_VPROC_TEMP	11
+#define MT6357_AUXADC_VBAT		12
+
+#endif
diff --git a/include/dt-bindings/iio/adc/mediatek,mt6358-auxadc.h b/include/dt-bindings/iio/adc/mediatek,mt6358-auxadc.h
new file mode 100644
index 000000000000..efa08398fafd
--- /dev/null
+++ b/include/dt-bindings/iio/adc/mediatek,mt6358-auxadc.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+
+#ifndef _DT_BINDINGS_MEDIATEK_MT6358_AUXADC_H
+#define _DT_BINDINGS_MEDIATEK_MT6358_AUXADC_H
+
+/* ADC Channel Index */
+#define MT6358_AUXADC_BATADC		0
+#define MT6358_AUXADC_VCDT		1
+#define MT6358_AUXADC_BAT_TEMP		2
+#define MT6358_AUXADC_CHIP_TEMP		3
+#define MT6358_AUXADC_ACCDET		4
+#define MT6358_AUXADC_VDCXO		5
+#define MT6358_AUXADC_TSX_TEMP		6
+#define MT6358_AUXADC_HPOFS_CAL		7
+#define MT6358_AUXADC_DCXO_TEMP		8
+#define MT6358_AUXADC_VBIF		9
+#define MT6358_AUXADC_VCORE_TEMP	10
+#define MT6358_AUXADC_VPROC_TEMP	11
+#define MT6358_AUXADC_VGPU_TEMP		12
+#define MT6358_AUXADC_VBAT		13
+
+#endif
diff --git a/include/dt-bindings/iio/adc/mediatek,mt6359-auxadc.h b/include/dt-bindings/iio/adc/mediatek,mt6359-auxadc.h
new file mode 100644
index 000000000000..59826393ee7e
--- /dev/null
+++ b/include/dt-bindings/iio/adc/mediatek,mt6359-auxadc.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+
+#ifndef _DT_BINDINGS_MEDIATEK_MT6359_AUXADC_H
+#define _DT_BINDINGS_MEDIATEK_MT6359_AUXADC_H
+
+/* ADC Channel Index */
+#define MT6359_AUXADC_BATADC		0
+#define MT6359_AUXADC_BAT_TEMP		1
+#define MT6359_AUXADC_CHIP_TEMP		2
+#define MT6359_AUXADC_ACCDET		3
+#define MT6359_AUXADC_VDCXO		4
+#define MT6359_AUXADC_TSX_TEMP		5
+#define MT6359_AUXADC_HPOFS_CAL		6
+#define MT6359_AUXADC_DCXO_TEMP		7
+#define MT6359_AUXADC_VBIF		8
+#define MT6359_AUXADC_VCORE_TEMP	9
+#define MT6359_AUXADC_VPROC_TEMP	10
+#define MT6359_AUXADC_VGPU_TEMP		11
+#define MT6359_AUXADC_VBAT		12
+#define MT6359_AUXADC_IBAT		13
+
+#endif
-- 
cgit v1.2.3


From e38a82df2c9924577d39ce5ccee9e9edcadc3b5d Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Tue, 4 Jun 2024 14:30:06 +0200
Subject: math.h: Add unsigned 8 bits fractional numbers type

Some users may be requiring only rather small numbers as both
numerator and denominator: add signed and unsigned 8 bits
structs {s8,u8}_fract.

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://patch.msgid.link/20240604123008.327424-4-angelogioacchino.delregno@collabora.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/math.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/math.h b/include/linux/math.h
index dd4152711de7..f5f18dc3616b 100644
--- a/include/linux/math.h
+++ b/include/linux/math.h
@@ -112,6 +112,8 @@ struct type##_fract {					\
 	__##type numerator;				\
 	__##type denominator;				\
 };
+__STRUCT_FRACT(s8)
+__STRUCT_FRACT(u8)
 __STRUCT_FRACT(s16)
 __STRUCT_FRACT(u16)
 __STRUCT_FRACT(s32)
-- 
cgit v1.2.3


From 8d9ffd15ff5c9da7bc6171f2536aaaff40bcab6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 1 Jun 2024 14:56:16 -0400
Subject: drm/amdgpu: remove AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_*
 definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

They were added accidentally.

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/drm_fourcc.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index d0063ac6e09f..4168445fbb8b 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -1540,9 +1540,6 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
 #define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_SHIFT 18
 #define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3
 
-#define AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT     3
-#define AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK      0x3 /* 0:64B, 1:128B, 2:256B */
-
 /*
  * DCC supports embedding some clear colors directly in the DCC surface.
  * However, on older GPUs the rendering HW ignores the embedded clear color
-- 
cgit v1.2.3


From 8dd1426e2c80e32ac1995007330c8f95ffa28ebb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 1 Jun 2024 19:53:01 -0400
Subject: drm/amdgpu: handle gfx12 in amdgpu_display_verify_sizes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It verified GFX9-11 swizzle modes on GFX12, which has undefined behavior.

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/drm_fourcc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 4168445fbb8b..2d84a8052b15 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -1506,6 +1506,8 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
  *    6 - 64KB_3D
  *    7 - 256KB_3D
  */
+#define AMD_FMT_MOD_TILE_GFX12_256B_2D 1
+#define AMD_FMT_MOD_TILE_GFX12_4K_2D 2
 #define AMD_FMT_MOD_TILE_GFX12_64K_2D 3
 #define AMD_FMT_MOD_TILE_GFX12_256K_2D 4
 
-- 
cgit v1.2.3


From 9f111059e725f7ca79a136bfc734da3c8c1838b4 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Thu, 27 Jun 2024 19:26:24 -0500
Subject: fs_parse: add uid & gid option option parsing helpers

Multiple filesystems take uid and gid as options, and the code to
create the ID from an integer and validate it is standard boilerplate
that can be moved into common helper functions, so do that for
consistency and less cut&paste.

This also helps avoid the buggy pattern noted by Seth Jenkins at
https://lore.kernel.org/lkml/CALxfFW4BXhEwxR0Q5LSkg-8Vb4r2MONKCcUCVioehXQKr35eHg@mail.gmail.com/
because uid/gid parsing will fail before any assignment in most
filesystems.

Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
Link: https://lore.kernel.org/r/de859d0a-feb9-473d-a5e2-c195a3d47abb@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs_parser.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index d3350979115f..6cf713a7e6c6 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -28,7 +28,7 @@ typedef int fs_param_type(struct p_log *,
  */
 fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64,
 	fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev,
-	fs_param_is_path, fs_param_is_fd;
+	fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid;
 
 /*
  * Specification of the type of value a parameter wants.
@@ -57,6 +57,8 @@ struct fs_parse_result {
 		int		int_32;		/* For spec_s32/spec_enum */
 		unsigned int	uint_32;	/* For spec_u32{,_octal,_hex}/spec_enum */
 		u64		uint_64;	/* For spec_u64 */
+		kuid_t		uid;
+		kgid_t		gid;
 	};
 };
 
@@ -131,6 +133,8 @@ static inline bool fs_validate_description(const char *name,
 #define fsparam_bdev(NAME, OPT)	__fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL)
 #define fsparam_path(NAME, OPT)	__fsparam(fs_param_is_path, NAME, OPT, 0, NULL)
 #define fsparam_fd(NAME, OPT)	__fsparam(fs_param_is_fd, NAME, OPT, 0, NULL)
+#define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL)
+#define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL)
 
 /* String parameter that allows empty argument */
 #define fsparam_string_empty(NAME, OPT) \
-- 
cgit v1.2.3


From f05c1ffc274516ef101d2e0f860bcb9b08c6c622 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Tue, 25 Jun 2024 19:25:46 +0200
Subject: ALSA: pcm: reinvent the stream synchronization ID API

Until the commit e11f0f90a626 ("ALSA: pcm: remove SNDRV_PCM_IOCTL1_INFO
internal command"), there was a possibility to pass information
about the synchronized streams to the user space. The mentioned
commit removed blindly the appropriate code with an irrelevant comment.

The revert may be appropriate, but since this API was lost for several
years without any complains, it's time to improve it. The hardware
parameters may change the used stream clock source (e.g. USB hardware)
so move this synchronization ID to hw_params as read-only field.

It seems that pipewire can benefit from this API (disable adaptive
resampling for perfectly synchronized PCM streams) now.

Note that the contents of ID is not supposed to be used for direct
comparison with a specific byte sequence. The "empty" case is when
all bytes are zero (driver does not offer this information)
and all other cases must be only used for equal comparison among
PCM streams (including different sound cards) if they are using
identical hardware clock.

Cc: Takashi Sakamoto <takaswie@kernel.org>
Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240625172836.589380-2-perex@perex.cz
---
 include/sound/pcm.h         | 3 ++-
 include/uapi/sound/asound.h | 9 +++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 3edd7a7346da..dbce137d8806 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -93,6 +93,7 @@ struct snd_pcm_ops {
 #define SNDRV_PCM_IOCTL1_CHANNEL_INFO	2
 /* 3 is absent slot. */
 #define SNDRV_PCM_IOCTL1_FIFO_SIZE	4
+#define SNDRV_PCM_IOCTL1_SYNC_ID	5
 
 #define SNDRV_PCM_TRIGGER_STOP		0
 #define SNDRV_PCM_TRIGGER_START		1
@@ -401,7 +402,7 @@ struct snd_pcm_runtime {
 	snd_pcm_uframes_t silence_start; /* starting pointer to silence area */
 	snd_pcm_uframes_t silence_filled; /* already filled part of silence area */
 
-	union snd_pcm_sync_id sync;	/* hardware synchronization ID */
+	unsigned char sync[16];		/* hardware synchronization ID */
 
 	/* -- mmap -- */
 	struct snd_pcm_mmap_status *status;
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 628d46a0da92..8bf7e8a0eb6f 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image {
  *                                                                           *
  *****************************************************************************/
 
-#define SNDRV_PCM_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 17)
+#define SNDRV_PCM_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 18)
 
 typedef unsigned long snd_pcm_uframes_t;
 typedef signed long snd_pcm_sframes_t;
@@ -334,7 +334,7 @@ union snd_pcm_sync_id {
 	unsigned char id[16];
 	unsigned short id16[8];
 	unsigned int id32[4];
-};
+} __attribute__((deprecated));
 
 struct snd_pcm_info {
 	unsigned int device;		/* RO/WR (control): device number */
@@ -348,7 +348,7 @@ struct snd_pcm_info {
 	int dev_subclass;		/* SNDRV_PCM_SUBCLASS_* */
 	unsigned int subdevices_count;
 	unsigned int subdevices_avail;
-	union snd_pcm_sync_id sync;	/* hardware synchronization ID */
+	unsigned char pad1[16];		/* was: hardware synchronization ID */
 	unsigned char reserved[64];	/* reserved for future... */
 };
 
@@ -420,7 +420,8 @@ struct snd_pcm_hw_params {
 	unsigned int rate_num;		/* R: rate numerator */
 	unsigned int rate_den;		/* R: rate denominator */
 	snd_pcm_uframes_t fifo_size;	/* R: chip FIFO size in frames */
-	unsigned char reserved[64];	/* reserved for future */
+	unsigned char sync[16];		/* R: synchronization ID (perfect sync - one clock source) */
+	unsigned char reserved[48];	/* reserved for future */
 };
 
 enum {
-- 
cgit v1.2.3


From d712c58c55d9a4b4cc88ec2e1f8cd2e3b82359b5 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Tue, 25 Jun 2024 19:25:47 +0200
Subject: ALSA: pcm: optimize and clarify stream synchronization ID API

Optimize the memory usage in struct snd_pcm_runtime - use boolean
value for the standard sync ID scheme.

Introduce snd_pcm_set_sync_per_card function to build synchronization
IDs.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Link: https://patch.msgid.link/20240625172836.589380-3-perex@perex.cz
---
 include/sound/pcm.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index dbce137d8806..ac8f3aef9205 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -402,7 +402,7 @@ struct snd_pcm_runtime {
 	snd_pcm_uframes_t silence_start; /* starting pointer to silence area */
 	snd_pcm_uframes_t silence_filled; /* already filled part of silence area */
 
-	unsigned char sync[16];		/* hardware synchronization ID */
+	bool std_sync_id;		/* hardware synchronization - standard per card ID */
 
 	/* -- mmap -- */
 	struct snd_pcm_mmap_status *status;
@@ -1156,7 +1156,18 @@ int snd_pcm_format_set_silence(snd_pcm_format_t format, void *buf, unsigned int
 
 void snd_pcm_set_ops(struct snd_pcm * pcm, int direction,
 		     const struct snd_pcm_ops *ops);
-void snd_pcm_set_sync(struct snd_pcm_substream *substream);
+void snd_pcm_set_sync_per_card(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params,
+			       const unsigned char *id, unsigned int len);
+/**
+ * snd_pcm_set_sync - set the PCM sync id
+ * @substream: the pcm substream
+ *
+ * Use the default PCM sync identifier for the specific card.
+ */
+static inline void snd_pcm_set_sync(struct snd_pcm_substream *substream)
+{
+	substream->runtime->std_sync_id = true;
+}
 int snd_pcm_lib_ioctl(struct snd_pcm_substream *substream,
 		      unsigned int cmd, void *arg);                      
 void snd_pcm_period_elapsed_under_stream_lock(struct snd_pcm_substream *substream);
-- 
cgit v1.2.3


From 6f2a875024993449f1b19a144d3e4391411a1b51 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Tue, 25 Jun 2024 09:38:15 +0200
Subject: gpiolib: unexport gpiochip_get_desc()

This function has been deprecated for some time and is now only used
within the GPIOLIB core. Remove it from the public header and unexport
it as all current users are linked against the compilation unit where
it is defined.

Link: https://lore.kernel.org/r/20240625073815.12376-1-brgl@bgdev.pl
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/driver.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 6d31388dde0a..2dd7cb9cc270 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -787,7 +787,6 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc,
 					    enum gpiod_flags dflags);
 void gpiochip_free_own_desc(struct gpio_desc *desc);
 
-struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum);
 struct gpio_desc *
 gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum);
 
-- 
cgit v1.2.3


From 060f4ba6e40338a70932603a3564903acf5f5734 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 27 Jun 2024 13:59:44 +0100
Subject: io_uring/net: move charging socket out of zc io_uring

Currently, io_uring's io_sg_from_iter() duplicates the part of
__zerocopy_sg_from_iter() charging pages to the socket. It'd be too easy
to miss while changing it in net/, the chunk is not the most
straightforward for outside users and full of internal implementation
details. io_uring is not a good place to keep it, deduplicate it by
moving out of the callback into __zerocopy_sg_from_iter().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h | 3 +++
 include/linux/socket.h | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f4cda3fbdb75..9c29bdd5596d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1703,6 +1703,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 			    struct sk_buff *skb, struct iov_iter *from,
 			    size_t length);
 
+int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
+				struct iov_iter *from, size_t length);
+
 static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb,
 					  struct msghdr *msg, int len)
 {
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 89d16b90370b..2a1ff91d1914 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -76,7 +76,7 @@ struct msghdr {
 	__kernel_size_t	msg_controllen;	/* ancillary data buffer length */
 	struct kiocb	*msg_iocb;	/* ptr to iocb for async requests */
 	struct ubuf_info *msg_ubuf;
-	int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb,
+	int (*sg_from_iter)(struct sk_buff *skb,
 			    struct iov_iter *from, size_t length);
 };
 
-- 
cgit v1.2.3


From d7f39aee79f04eeaa42085728423501b33ac5be5 Mon Sep 17 00:00:00 2001
From: David Wei <dw@davidwei.uk>
Date: Wed, 26 Jun 2024 20:01:59 -0700
Subject: page_pool: export page_pool_disable_direct_recycling()

56ef27e3 unexported page_pool_unlink_napi() and renamed it to
page_pool_disable_direct_recycling(). This is because there was no
in-tree user of page_pool_unlink_napi().

Since then Rx queue API and an implementation in bnxt got merged. In the
bnxt implementation, it broadly follows the following steps: allocate
new queue memory + page pool, stop old rx queue, swap, then destroy old
queue memory + page pool.

The existing NAPI instance is re-used so when the old page pool that is
no longer used but still linked to this shared NAPI instance is
destroyed, it will trigger warnings.

In my initial patches I unlinked a page pool from a NAPI instance
directly. Instead, export page_pool_disable_direct_recycling() and call
that instead to avoid having a driver touch a core struct.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David Wei <dw@davidwei.uk>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/page_pool/types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 7e8477057f3d..9093a964fc33 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -229,6 +229,7 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
 struct xdp_mem_info;
 
 #ifdef CONFIG_PAGE_POOL
+void page_pool_disable_direct_recycling(struct page_pool *pool);
 void page_pool_destroy(struct page_pool *pool);
 void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
 			   const struct xdp_mem_info *mem);
-- 
cgit v1.2.3


From 32267c29bc7d5c9654b71e4f354064217a5fb053 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <andre.draszik@linaro.org>
Date: Mon, 17 Jun 2024 17:44:47 +0100
Subject: phy: exynos5-usbdrd: support Exynos USBDRD 3.1 combo phy (HS & SS)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for the Exynos USB 3.1 DRD combo phy, as found in Exynos 9
SoCs like Google GS101. It supports USB SS, HS and DisplayPort.

In terms of UTMI+, this is very similar to the existing Exynos850
support in this driver. The difference is that this combo phy supports
both UTMI+ (HS) and PIPE3 (SS). It also supports DP alt mode.

The number of ports for UTMI+ and PIPE3 can be determined using the
LINKPORT register (which also exists on Exynos E850).

For SuperSpeed (SS) a new SS phy is in use and its PIPE3 interface is
new compared to Exynos E850, and also very different from the existing
support for older Exynos SoCs in this driver.

The SS phy needs a bit more configuration work and register tuning for
signal quality to work reliably, presumably due to the higher
frequency, e.g. to account for different board layouts. Additionally,
power needs to be enabled before writing to the SS phy registers.

This commit adds the necessary changes for USB HS and SS to work.
DisplayPort is out of scope in this commit.

Notes:
* For the register tuning, exynos5_usbdrd_apply_phy_tunes() has been
  added with the appropriate data structures to support tuning at
  various stages during initialisation. Since these are hardware
  specific, the platform data is supposed to be populated accordingly.
  The implementation is loosely modelled after the Samsung UFS PHY
  driver.

  There is one tuning state for UTMI+, PTS_UTMI_POSTINIT, to execute
  after init and generally intended for HS signal tuning, as done in
  this commit.

  PTS_PIPE3_PREINIT PTS_PIPE3_INIT PTS_PIPE3_POSTINIT
  PTS_PIPE3_POSTLOCK are tuning states for PIPE3. In the downstream
  driver, preinit differs by Exynos SoC, and postinit and postlock
  are different per board. The latter haven't been implemented for
  gs101 here, because downstream doesn't use them on gs101 either.

* Signal lock acquisition for SS depends on the orientation of the
  USB-C plug. Since there currently is no infrastructure to chain
  connector events to both the USB DWC3 driver and this phy driver, a
  work-around has been added in
  exynos5_usbdrd_usbdp_g2_v4_pma_check_cdr_lock() to check both
  registers if it failed in one of the orientations.

* Equally, we can only establish SS speed in one of the connector
  orientations due to programming differences when selecting the lane
  mux in exynos5_usbdrd_usbdp_g2_v4_pma_lane_mux_sel(), which really
  needs to be dynamic, based on the orientation of the connector.

* As is, we can establish a HS link using any cable, and an SS link in
  one orientation of the plug, falling back to HS if the orientation is
  reversed to the expectation.

Signed-off-by: André Draszik <andre.draszik@linaro.org>
Reviewed-by: Peter Griffin <peter.griffin@linaro.org>
Tested-by: Peter Griffin <peter.griffin@linaro.org>
Tested-by: Will McVicker <willmcvicker@google.com>
Link: https://lore.kernel.org/r/20240617-usb-phy-gs101-v3-6-b66de9ae7424@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soc/samsung/exynos-regs-pmu.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index aa840ed043e1..6765160eaab2 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -657,4 +657,8 @@
 #define EXYNOS5433_PAD_RETENTION_UFS_OPTION			(0x3268)
 #define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION		(0x32A8)
 
+/* For GS101 */
+#define GS101_PHY_CTRL_USB20					0x3eb0
+#define GS101_PHY_CTRL_USBDP					0x3eb4
+
 #endif /* __LINUX_SOC_EXYNOS_REGS_PMU_H */
-- 
cgit v1.2.3


From d839a73179ae91c07f5f2f97ccb9c69b2b7c3306 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 28 Jun 2024 12:18:55 +0200
Subject: net: Optimize xdp_do_flush() with bpf_net_context infos.

Every NIC driver utilizing XDP should invoke xdp_do_flush() after
processing all packages. With the introduction of the bpf_net_context
logic the flush lists (for dev, CPU-map and xsk) are lazy initialized
only if used. However xdp_do_flush() tries to flush all three of them so
all three lists are always initialized and the likely empty lists are
"iterated".
Without the usage of XDP but with CONFIG_DEBUG_NET the lists are also
initialized due to xdp_do_check_flushed().

Jakub suggest to utilize the hints in bpf_net_context and avoid invoking
the flush function. This will also avoiding initializing the lists which
are otherwise unused.

Introduce bpf_net_ctx_get_all_used_flush_lists() to return the
individual list if not-empty. Use the logic in xdp_do_flush() and
xdp_do_check_flushed(). Remove the not needed .*_check_flush().

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/bpf.h    | 10 ++++------
 include/linux/filter.h | 27 +++++++++++++++++++++++++++
 include/net/xdp_sock.h | 14 ++------------
 3 files changed, 33 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a834f4b761bc..f5c6bc9093a6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2494,7 +2494,7 @@ struct sk_buff;
 struct bpf_dtab_netdev;
 struct bpf_cpu_map_entry;
 
-void __dev_flush(void);
+void __dev_flush(struct list_head *flush_list);
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
 		    struct net_device *dev_rx);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
@@ -2507,7 +2507,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
 			   struct bpf_prog *xdp_prog, struct bpf_map *map,
 			   bool exclude_ingress);
 
-void __cpu_map_flush(void);
+void __cpu_map_flush(struct list_head *flush_list);
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
 		    struct net_device *dev_rx);
 int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
@@ -2644,8 +2644,6 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
 void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
 void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
 
-bool dev_check_flush(void);
-bool cpu_map_check_flush(void);
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -2738,7 +2736,7 @@ static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd)
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
-static inline void __dev_flush(void)
+static inline void __dev_flush(struct list_head *flush_list)
 {
 }
 
@@ -2784,7 +2782,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
 	return 0;
 }
 
-static inline void __cpu_map_flush(void)
+static inline void __cpu_map_flush(struct list_head *flush_list)
 {
 }
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c0349522de8f..02ddcfdf94c4 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -829,6 +829,33 @@ static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void)
 	return &bpf_net_ctx->xskmap_map_flush_list;
 }
 
+static inline void bpf_net_ctx_get_all_used_flush_lists(struct list_head **lh_map,
+							struct list_head **lh_dev,
+							struct list_head **lh_xsk)
+{
+	struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get();
+	u32 kern_flags = bpf_net_ctx->ri.kern_flags;
+	struct list_head *lh;
+
+	*lh_map = *lh_dev = *lh_xsk = NULL;
+
+	if (!IS_ENABLED(CONFIG_BPF_SYSCALL))
+		return;
+
+	lh = &bpf_net_ctx->dev_map_flush_list;
+	if (kern_flags & BPF_RI_F_DEV_MAP_INIT && !list_empty(lh))
+		*lh_dev = lh;
+
+	lh = &bpf_net_ctx->cpu_map_flush_list;
+	if (kern_flags & BPF_RI_F_CPU_MAP_INIT && !list_empty(lh))
+		*lh_map = lh;
+
+	lh = &bpf_net_ctx->xskmap_map_flush_list;
+	if (IS_ENABLED(CONFIG_XDP_SOCKETS) &&
+	    kern_flags & BPF_RI_F_XSK_MAP_INIT && !list_empty(lh))
+		*lh_xsk = lh;
+}
+
 /* Compute the linear packet data range [data, data_end) which
  * will be accessed by various program types (cls_bpf, act_bpf,
  * lwt, ...). Subsystems allowing direct data access must (!)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 3d54de168a6d..bfe625b55d55 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -121,7 +121,7 @@ struct xsk_tx_metadata_ops {
 
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
-void __xsk_map_flush(void);
+void __xsk_map_flush(struct list_head *flush_list);
 
 /**
  *  xsk_tx_metadata_to_compl - Save enough relevant metadata information
@@ -206,7 +206,7 @@ static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 	return -EOPNOTSUPP;
 }
 
-static inline void __xsk_map_flush(void)
+static inline void __xsk_map_flush(struct list_head *flush_list)
 {
 }
 
@@ -228,14 +228,4 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
 }
 
 #endif /* CONFIG_XDP_SOCKETS */
-
-#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET)
-bool xsk_map_check_flush(void);
-#else
-static inline bool xsk_map_check_flush(void)
-{
-	return false;
-}
-#endif
-
 #endif /* _LINUX_XDP_SOCK_H */
-- 
cgit v1.2.3


From 7a4479680d7fd05c7a3efa87b41f421af48fbbdf Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 1 Jul 2024 16:49:37 -0700
Subject: cgroup_misc: add kernel-doc comments for enum misc_res_type

Fully document enum misc_res_type with kernel-doc comments to prevent
kernel-doc warnings:

misc_cgroup.h:12: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Types of misc cgroup entries supported by the host.
misc_cgroup.h:12: warning: missing initial short description on line:
 * Types of misc cgroup entries supported by the host.

Fixes: a72232eabdfc ("cgroup: Add misc cgroup controller")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: cgroups@vger.kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/misc_cgroup.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
index e799b1f8d05b..d70eab2501ee 100644
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -9,15 +9,16 @@
 #define _MISC_CGROUP_H_
 
 /**
- * Types of misc cgroup entries supported by the host.
+ * enum misc_res_type - Types of misc cgroup entries supported by the host.
  */
 enum misc_res_type {
 #ifdef CONFIG_KVM_AMD_SEV
-	/* AMD SEV ASIDs resource */
+	/** @MISC_CG_RES_SEV: AMD SEV ASIDs resource */
 	MISC_CG_RES_SEV,
-	/* AMD SEV-ES ASIDs resource */
+	/** @MISC_CG_RES_SEV_ES: AMD SEV-ES ASIDs resource */
 	MISC_CG_RES_SEV_ES,
 #endif
+	/** @MISC_CG_RES_TYPES: count of enum misc_res_type constants */
 	MISC_CG_RES_TYPES
 };
 
-- 
cgit v1.2.3


From f436cb6913a57bf3e1e66d18bc663e6c20751929 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 28 Jun 2024 14:56:01 -0700
Subject: x86/resctrl: Prepare for new domain scope

Resctrl resources operate on subsets of CPUs in the system with the
defining attribute of each subset being an instance of a particular
level of cache. E.g. all CPUs sharing an L3 cache would be part of the
same domain.

In preparation for features that are scoped at the NUMA node level,
change the code from explicit references to "cache_level" to a more
generic scope. At this point the only options for this scope are groups
of CPUs that share an L2 cache or L3 cache.

Clean up the error handling when looking up domains. Report invalid ids
before calling rdt_find_domain() in preparation for better messages when
scope can be other than cache scope. This means that rdt_find_domain()
will never return an error. So remove checks for error from the call sites.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Link: https://lore.kernel.org/r/20240628215619.76401-2-tony.luck@intel.com
---
 include/linux/resctrl.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index a365f67131ec..ed693bfe474d 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -150,13 +150,18 @@ struct resctrl_membw {
 struct rdt_parse_data;
 struct resctrl_schema;
 
+enum resctrl_scope {
+	RESCTRL_L2_CACHE = 2,
+	RESCTRL_L3_CACHE = 3,
+};
+
 /**
  * struct rdt_resource - attributes of a resctrl resource
  * @rid:		The index of the resource
  * @alloc_capable:	Is allocation available on this machine
  * @mon_capable:	Is monitor feature available on this machine
  * @num_rmid:		Number of RMIDs available
- * @cache_level:	Which cache level defines scope of this resource
+ * @scope:		Scope of this resource
  * @cache:		Cache allocation related data
  * @membw:		If the component has bandwidth controls, their properties.
  * @domains:		RCU list of all domains for this resource
@@ -174,7 +179,7 @@ struct rdt_resource {
 	bool			alloc_capable;
 	bool			mon_capable;
 	int			num_rmid;
-	int			cache_level;
+	enum resctrl_scope	scope;
 	struct resctrl_cache	cache;
 	struct resctrl_membw	membw;
 	struct list_head	domains;
-- 
cgit v1.2.3


From c103d4d48e1599a88001fa6215be27d55f3c025b Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 28 Jun 2024 14:56:02 -0700
Subject: x86/resctrl: Prepare to split rdt_domain structure

The rdt_domain structure is used for both control and monitor features.
It is about to be split into separate structures for these two usages
because the scope for control and monitoring features for a resource
will be different for future resources.

To allow for common code that scans a list of domains looking for a
specific domain id, move all the common fields ("list", "id", "cpu_mask")
into their own structure within the rdt_domain structure.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Link: https://lore.kernel.org/r/20240628215619.76401-3-tony.luck@intel.com
---
 include/linux/resctrl.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index ed693bfe474d..f63fcf17a3bc 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -59,10 +59,20 @@ struct resctrl_staged_config {
 };
 
 /**
- * struct rdt_domain - group of CPUs sharing a resctrl resource
+ * struct rdt_domain_hdr - common header for different domain types
  * @list:		all instances of this resource
  * @id:			unique id for this instance
  * @cpu_mask:		which CPUs share this resource
+ */
+struct rdt_domain_hdr {
+	struct list_head		list;
+	int				id;
+	struct cpumask			cpu_mask;
+};
+
+/**
+ * struct rdt_domain - group of CPUs sharing a resctrl resource
+ * @hdr:		common header for different domain types
  * @rmid_busy_llc:	bitmap of which limbo RMIDs are above threshold
  * @mbm_total:		saved state for MBM total bandwidth
  * @mbm_local:		saved state for MBM local bandwidth
@@ -77,9 +87,7 @@ struct resctrl_staged_config {
  *			by closid
  */
 struct rdt_domain {
-	struct list_head		list;
-	int				id;
-	struct cpumask			cpu_mask;
+	struct rdt_domain_hdr		hdr;
 	unsigned long			*rmid_busy_llc;
 	struct mbm_state		*mbm_total;
 	struct mbm_state		*mbm_local;
-- 
cgit v1.2.3


From cd84f72b6a5c10f79f19fab67b0edfbc4fdbc5b1 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 28 Jun 2024 14:56:03 -0700
Subject: x86/resctrl: Prepare for different scope for control/monitor
 operations

Resctrl assumes that control and monitor operations on a resource are
performed at the same scope.

Prepare for systems that use different scope (specifically Intel needs
to split the RDT_RESOURCE_L3 resource to use L3 scope for cache control
and NODE scope for cache occupancy and memory bandwidth monitoring).

Create separate domain lists for control and monitor operations.

Note that errors during initialization of either control or monitor
functions on a domain would previously result in that domain being
excluded from both control and monitor operations. Now the domains are
allocated independently it is no longer required to disable both control
and monitor operations if either fail.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Link: https://lore.kernel.org/r/20240628215619.76401-4-tony.luck@intel.com
---
 include/linux/resctrl.h | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index f63fcf17a3bc..96ddf9ff3183 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -58,15 +58,22 @@ struct resctrl_staged_config {
 	bool			have_new_ctrl;
 };
 
+enum resctrl_domain_type {
+	RESCTRL_CTRL_DOMAIN,
+	RESCTRL_MON_DOMAIN,
+};
+
 /**
  * struct rdt_domain_hdr - common header for different domain types
  * @list:		all instances of this resource
  * @id:			unique id for this instance
+ * @type:		type of this instance
  * @cpu_mask:		which CPUs share this resource
  */
 struct rdt_domain_hdr {
 	struct list_head		list;
 	int				id;
+	enum resctrl_domain_type	type;
 	struct cpumask			cpu_mask;
 };
 
@@ -169,10 +176,12 @@ enum resctrl_scope {
  * @alloc_capable:	Is allocation available on this machine
  * @mon_capable:	Is monitor feature available on this machine
  * @num_rmid:		Number of RMIDs available
- * @scope:		Scope of this resource
+ * @ctrl_scope:		Scope of this resource for control functions
+ * @mon_scope:		Scope of this resource for monitor functions
  * @cache:		Cache allocation related data
  * @membw:		If the component has bandwidth controls, their properties.
- * @domains:		RCU list of all domains for this resource
+ * @ctrl_domains:	RCU list of all control domains for this resource
+ * @mon_domains:	RCU list of all monitor domains for this resource
  * @name:		Name to use in "schemata" file.
  * @data_width:		Character width of data when displaying
  * @default_ctrl:	Specifies default cache cbm or memory B/W percent.
@@ -187,10 +196,12 @@ struct rdt_resource {
 	bool			alloc_capable;
 	bool			mon_capable;
 	int			num_rmid;
-	enum resctrl_scope	scope;
+	enum resctrl_scope	ctrl_scope;
+	enum resctrl_scope	mon_scope;
 	struct resctrl_cache	cache;
 	struct resctrl_membw	membw;
-	struct list_head	domains;
+	struct list_head	ctrl_domains;
+	struct list_head	mon_domains;
 	char			*name;
 	int			data_width;
 	u32			default_ctrl;
@@ -236,8 +247,10 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
 
 u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
 			    u32 closid, enum resctrl_conf_type type);
-int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d);
-void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
+int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d);
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d);
+void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d);
+void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d);
 void resctrl_online_cpu(unsigned int cpu);
 void resctrl_offline_cpu(unsigned int cpu);
 
-- 
cgit v1.2.3


From cae2bcb6a2c691ef7b537ad07e9819a5ed645bcc Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 28 Jun 2024 14:56:04 -0700
Subject: x86/resctrl: Split the rdt_domain and rdt_hw_domain structures

The same rdt_domain structure is used for both control and monitor
functions. But this results in wasted memory as some of the fields are
only used by control functions, while most are only used for monitor
functions.

Split into separate rdt_ctrl_domain and rdt_mon_domain structures with
just the fields required for control and monitoring respectively.

Similar split of the rdt_hw_domain structure into rdt_hw_ctrl_domain
and rdt_hw_mon_domain.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Link: https://lore.kernel.org/r/20240628215619.76401-5-tony.luck@intel.com
---
 include/linux/resctrl.h | 48 ++++++++++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 96ddf9ff3183..aa2c22a8e37b 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -78,7 +78,23 @@ struct rdt_domain_hdr {
 };
 
 /**
- * struct rdt_domain - group of CPUs sharing a resctrl resource
+ * struct rdt_ctrl_domain - group of CPUs sharing a resctrl control resource
+ * @hdr:		common header for different domain types
+ * @plr:		pseudo-locked region (if any) associated with domain
+ * @staged_config:	parsed configuration to be applied
+ * @mbps_val:		When mba_sc is enabled, this holds the array of user
+ *			specified control values for mba_sc in MBps, indexed
+ *			by closid
+ */
+struct rdt_ctrl_domain {
+	struct rdt_domain_hdr		hdr;
+	struct pseudo_lock_region	*plr;
+	struct resctrl_staged_config	staged_config[CDP_NUM_TYPES];
+	u32				*mbps_val;
+};
+
+/**
+ * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
  * @hdr:		common header for different domain types
  * @rmid_busy_llc:	bitmap of which limbo RMIDs are above threshold
  * @mbm_total:		saved state for MBM total bandwidth
@@ -87,13 +103,8 @@ struct rdt_domain_hdr {
  * @cqm_limbo:		worker to periodically read CQM h/w counters
  * @mbm_work_cpu:	worker CPU for MBM h/w counters
  * @cqm_work_cpu:	worker CPU for CQM h/w counters
- * @plr:		pseudo-locked region (if any) associated with domain
- * @staged_config:	parsed configuration to be applied
- * @mbps_val:		When mba_sc is enabled, this holds the array of user
- *			specified control values for mba_sc in MBps, indexed
- *			by closid
  */
-struct rdt_domain {
+struct rdt_mon_domain {
 	struct rdt_domain_hdr		hdr;
 	unsigned long			*rmid_busy_llc;
 	struct mbm_state		*mbm_total;
@@ -102,9 +113,6 @@ struct rdt_domain {
 	struct delayed_work		cqm_limbo;
 	int				mbm_work_cpu;
 	int				cqm_work_cpu;
-	struct pseudo_lock_region	*plr;
-	struct resctrl_staged_config	staged_config[CDP_NUM_TYPES];
-	u32				*mbps_val;
 };
 
 /**
@@ -208,7 +216,7 @@ struct rdt_resource {
 	const char		*format_str;
 	int			(*parse_ctrlval)(struct rdt_parse_data *data,
 						 struct resctrl_schema *s,
-						 struct rdt_domain *d);
+						 struct rdt_ctrl_domain *d);
 	struct list_head	evt_list;
 	unsigned long		fflags;
 	bool			cdp_capable;
@@ -242,15 +250,15 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
  * Update the ctrl_val and apply this config right now.
  * Must be called on one of the domain's CPUs.
  */
-int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
+int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 			    u32 closid, enum resctrl_conf_type t, u32 cfg_val);
 
-u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
+u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 			    u32 closid, enum resctrl_conf_type type);
-int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d);
-int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d);
-void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d);
-void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d);
+int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d);
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d);
+void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d);
+void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d);
 void resctrl_online_cpu(unsigned int cpu);
 void resctrl_offline_cpu(unsigned int cpu);
 
@@ -279,7 +287,7 @@ void resctrl_offline_cpu(unsigned int cpu);
  * Return:
  * 0 on success, or -EIO, -EINVAL etc on error.
  */
-int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
+int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
 			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
 			   u64 *val, void *arch_mon_ctx);
 
@@ -312,7 +320,7 @@ static inline void resctrl_arch_rmid_read_context_check(void)
  *
  * This can be called from any CPU.
  */
-void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
+void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
 			     u32 closid, u32 rmid,
 			     enum resctrl_event_id eventid);
 
@@ -325,7 +333,7 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
  *
  * This can be called from any CPU.
  */
-void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d);
+void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d);
 
 extern unsigned int resctrl_rmid_realloc_threshold;
 extern unsigned int resctrl_rmid_realloc_limit;
-- 
cgit v1.2.3


From 1a171608ee8d40d22d604303e42f033c69151123 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 28 Jun 2024 14:56:05 -0700
Subject: x86/resctrl: Add node-scope to the options for feature scope

Currently supported resctrl features are all domain scoped the same as the
scope of the L2 or L3 caches.

Add RESCTRL_L3_NODE as a new option for features that are scoped at the
same granularity as NUMA nodes. This is needed for Intel's Sub-NUMA
Cluster (SNC) feature where monitoring features are divided between
nodes that share an L3 cache.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Link: https://lore.kernel.org/r/20240628215619.76401-6-tony.luck@intel.com
---
 include/linux/resctrl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index aa2c22a8e37b..64b6ad1b22a1 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -176,6 +176,7 @@ struct resctrl_schema;
 enum resctrl_scope {
 	RESCTRL_L2_CACHE = 2,
 	RESCTRL_L3_CACHE = 3,
+	RESCTRL_L3_NODE,
 };
 
 /**
-- 
cgit v1.2.3


From 328ea688746420e12ced6cfbc5064413180244cc Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 28 Jun 2024 14:56:08 -0700
Subject: x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When SNC is enabled, monitoring data is collected at the SNC node granularity,
but must be reported at L3-cache granularity for backwards compatibility in
addition to reporting at the node level.

Add a "ci" field to the rdt_mon_domain structure to save the cache information
about the enclosing L3 cache for the domain.  This provides:

1) The cache id which is needed to compose the name of the legacy monitoring
   directory, and to determine which domains should be summed to provide
   L3-scoped data.

2) The shared_cpu_map which is needed to determine which CPUs can be used to
   read the RMID counters with the MSR interface.

This is the first step to an eventual goal of monitor reporting files like this
(for a system with two SNC nodes per L3):

  $ cd /sys/fs/resctrl/mon_data
  $ tree mon_L3_00
  mon_L3_00			<- 00 here is L3 cache id
  ├── llc_occupancy		\  These files provide legacy support
  ├── mbm_local_bytes		 > for non-SNC aware monitor apps
  ├── mbm_total_bytes		/  that expect data at L3 cache level
  ├── mon_sub_L3_00		<- 00 here is SNC node id
  │   ├── llc_occupancy		\  These files are finer grained
  │   ├── mbm_local_bytes		 > data from each SNC node
  │   └── mbm_total_bytes		/
  └── mon_sub_L3_01
      ├── llc_occupancy		\
      ├── mbm_local_bytes		 > As above, but for node 1.
      └── mbm_total_bytes		/

  [ bp: Massage commit message. ]

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Link: https://lore.kernel.org/r/20240628215619.76401-9-tony.luck@intel.com
---
 include/linux/resctrl.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 64b6ad1b22a1..b0875b99e811 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -2,6 +2,7 @@
 #ifndef _RESCTRL_H
 #define _RESCTRL_H
 
+#include <linux/cacheinfo.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/pid.h>
@@ -96,6 +97,7 @@ struct rdt_ctrl_domain {
 /**
  * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
  * @hdr:		common header for different domain types
+ * @ci:			cache info for this domain
  * @rmid_busy_llc:	bitmap of which limbo RMIDs are above threshold
  * @mbm_total:		saved state for MBM total bandwidth
  * @mbm_local:		saved state for MBM local bandwidth
@@ -106,6 +108,7 @@ struct rdt_ctrl_domain {
  */
 struct rdt_mon_domain {
 	struct rdt_domain_hdr		hdr;
+	struct cacheinfo		*ci;
 	unsigned long			*rmid_busy_llc;
 	struct mbm_state		*mbm_total;
 	struct mbm_state		*mbm_local;
-- 
cgit v1.2.3


From 675e979db473d08be346a3190c5f0db095a57153 Mon Sep 17 00:00:00 2001
From: "Fabio M. De Francesco" <fabio.m.de.francesco@linux.intel.com>
Date: Fri, 7 Jun 2024 16:43:58 +0200
Subject: cxl/events: Use a common struct for DRAM and General Media events

cxl_event_common was an unfortunate naming choice and caused confusion with
the existing Common Event Record. Furthermore, its fields didn't map all
the common information between DRAM and General Media Events.

Remove cxl_event_common and introduce cxl_event_media_hdr to record common
information between DRAM and General Media events.

cxl_event_media_hdr, which is embedded in both cxl_event_gen_media and
cxl_event_dram, leverages the commonalities between the two events to
simplify their respective handling.

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20240607144423.48681-1-fabio.m.de.francesco@linux.intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/cxl-event.h | 45 +++++++++++++++++++--------------------------
 1 file changed, 19 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h
index 60b25020281f..0bea1afbd747 100644
--- a/include/linux/cxl-event.h
+++ b/include/linux/cxl-event.h
@@ -21,6 +21,21 @@ struct cxl_event_record_hdr {
 	u8 reserved[15];
 } __packed;
 
+struct cxl_event_media_hdr {
+	struct cxl_event_record_hdr hdr;
+	__le64 phys_addr;
+	u8 descriptor;
+	u8 type;
+	u8 transaction_type;
+	/*
+	 * The meaning of Validity Flags from bit 2 is
+	 * different across DRAM and General Media records
+	 */
+	u8 validity_flags[2];
+	u8 channel;
+	u8 rank;
+} __packed;
+
 #define CXL_EVENT_RECORD_DATA_LENGTH 0x50
 struct cxl_event_generic {
 	struct cxl_event_record_hdr hdr;
@@ -33,14 +48,7 @@ struct cxl_event_generic {
  */
 #define CXL_EVENT_GEN_MED_COMP_ID_SIZE	0x10
 struct cxl_event_gen_media {
-	struct cxl_event_record_hdr hdr;
-	__le64 phys_addr;
-	u8 descriptor;
-	u8 type;
-	u8 transaction_type;
-	u8 validity_flags[2];
-	u8 channel;
-	u8 rank;
+	struct cxl_event_media_hdr media_hdr;
 	u8 device[3];
 	u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
 	u8 reserved[46];
@@ -52,14 +60,7 @@ struct cxl_event_gen_media {
  */
 #define CXL_EVENT_DER_CORRECTION_MASK_SIZE	0x20
 struct cxl_event_dram {
-	struct cxl_event_record_hdr hdr;
-	__le64 phys_addr;
-	u8 descriptor;
-	u8 type;
-	u8 transaction_type;
-	u8 validity_flags[2];
-	u8 channel;
-	u8 rank;
+	struct cxl_event_media_hdr media_hdr;
 	u8 nibble_mask[3];
 	u8 bank_group;
 	u8 bank;
@@ -95,21 +96,13 @@ struct cxl_event_mem_module {
 	u8 reserved[0x3d];
 } __packed;
 
-/*
- * General Media or DRAM Event Common Fields
- * - provides common access to phys_addr
- */
-struct cxl_event_common {
-	struct cxl_event_record_hdr hdr;
-	__le64 phys_addr;
-} __packed;
-
 union cxl_event {
 	struct cxl_event_generic generic;
 	struct cxl_event_gen_media gen_media;
 	struct cxl_event_dram dram;
 	struct cxl_event_mem_module mem_module;
-	struct cxl_event_common common;
+	/* dram & gen_media event header */
+	struct cxl_event_media_hdr media_hdr;
 } __packed;
 
 /*
-- 
cgit v1.2.3


From 4dec64c52e24c2c9a15f81c115f1be5ea35121cb Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Fri, 28 Jun 2024 00:32:42 +0000
Subject: page_pool: convert to use netmem

Abstract the memory type from the page_pool so we can later add support
for new memory types. Convert the page_pool to use the new netmem type
abstraction, rather than use struct page directly.

As of this patch the netmem type is a no-op abstraction: it's always a
struct page underneath. All the page pool internals are converted to
use struct netmem instead of struct page, and the page pool now exports
2 APIs:

1. The existing struct page API.
2. The new struct netmem API.

Keeping the existing API is transitional; we do not want to refactor all
the current drivers using the page pool at once.

The netmem abstraction is currently a no-op. The page_pool uses
page_to_netmem() to convert allocated pages to netmem, and uses
netmem_to_page() to convert the netmem back to pages to pass to mm APIs,

Follow up patches to this series add non-paged netmem support to the
page_pool. This change is factored out on its own to limit the code
churn to this 1 patch, for ease of code review.

Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://patch.msgid.link/20240628003253.1694510-6-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff_ref.h       |  4 +-
 include/net/netmem.h             | 15 +++++++
 include/net/page_pool/helpers.h  | 91 ++++++++++++++++++++++++++++++----------
 include/net/page_pool/types.h    | 14 +++++--
 include/trace/events/page_pool.h | 30 ++++++-------
 5 files changed, 114 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h
index 11f0a4063403..16c241a23472 100644
--- a/include/linux/skbuff_ref.h
+++ b/include/linux/skbuff_ref.h
@@ -32,13 +32,13 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
 	__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
 }
 
-bool napi_pp_put_page(struct page *page);
+bool napi_pp_put_page(netmem_ref netmem);
 
 static inline void
 skb_page_unref(struct page *page, bool recycle)
 {
 #ifdef CONFIG_PAGE_POOL
-	if (recycle && napi_pp_put_page(page))
+	if (recycle && napi_pp_put_page(page_to_netmem(page)))
 		return;
 #endif
 	put_page(page);
diff --git a/include/net/netmem.h b/include/net/netmem.h
index d8b810245c1d..46cc9b89ac79 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -38,4 +38,19 @@ static inline netmem_ref page_to_netmem(struct page *page)
 	return (__force netmem_ref)page;
 }
 
+static inline int netmem_ref_count(netmem_ref netmem)
+{
+	return page_ref_count(netmem_to_page(netmem));
+}
+
+static inline unsigned long netmem_to_pfn(netmem_ref netmem)
+{
+	return page_to_pfn(netmem_to_page(netmem));
+}
+
+static inline netmem_ref netmem_compound_head(netmem_ref netmem)
+{
+	return page_to_netmem(compound_head(netmem_to_page(netmem)));
+}
+
 #endif /* _NET_NETMEM_H */
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 873631c79ab1..2b43a893c619 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -55,6 +55,8 @@
 #include <linux/dma-mapping.h>
 
 #include <net/page_pool/types.h>
+#include <net/net_debug.h>
+#include <net/netmem.h>
 
 #ifdef CONFIG_PAGE_POOL_STATS
 /* Deprecated driver-facing API, use netlink instead */
@@ -212,6 +214,11 @@ page_pool_get_dma_dir(const struct page_pool *pool)
 	return pool->p.dma_dir;
 }
 
+static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr)
+{
+	atomic_long_set(&netmem_to_page(netmem)->pp_ref_count, nr);
+}
+
 /**
  * page_pool_fragment_page() - split a fresh page into fragments
  * @page:	page to split
@@ -232,11 +239,12 @@ page_pool_get_dma_dir(const struct page_pool *pool)
  */
 static inline void page_pool_fragment_page(struct page *page, long nr)
 {
-	atomic_long_set(&page->pp_ref_count, nr);
+	page_pool_fragment_netmem(page_to_netmem(page), nr);
 }
 
-static inline long page_pool_unref_page(struct page *page, long nr)
+static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
 {
+	struct page *page = netmem_to_page(netmem);
 	long ret;
 
 	/* If nr == pp_ref_count then we have cleared all remaining
@@ -279,15 +287,41 @@ static inline long page_pool_unref_page(struct page *page, long nr)
 	return ret;
 }
 
+static inline long page_pool_unref_page(struct page *page, long nr)
+{
+	return page_pool_unref_netmem(page_to_netmem(page), nr);
+}
+
+static inline void page_pool_ref_netmem(netmem_ref netmem)
+{
+	atomic_long_inc(&netmem_to_page(netmem)->pp_ref_count);
+}
+
 static inline void page_pool_ref_page(struct page *page)
 {
-	atomic_long_inc(&page->pp_ref_count);
+	page_pool_ref_netmem(page_to_netmem(page));
 }
 
-static inline bool page_pool_is_last_ref(struct page *page)
+static inline bool page_pool_is_last_ref(netmem_ref netmem)
 {
 	/* If page_pool_unref_page() returns 0, we were the last user */
-	return page_pool_unref_page(page, 1) == 0;
+	return page_pool_unref_netmem(netmem, 1) == 0;
+}
+
+static inline void page_pool_put_netmem(struct page_pool *pool,
+					netmem_ref netmem,
+					unsigned int dma_sync_size,
+					bool allow_direct)
+{
+	/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
+	 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
+	 */
+#ifdef CONFIG_PAGE_POOL
+	if (!page_pool_is_last_ref(netmem))
+		return;
+
+	page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct);
+#endif
 }
 
 /**
@@ -308,15 +342,15 @@ static inline void page_pool_put_page(struct page_pool *pool,
 				      unsigned int dma_sync_size,
 				      bool allow_direct)
 {
-	/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
-	 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
-	 */
-#ifdef CONFIG_PAGE_POOL
-	if (!page_pool_is_last_ref(page))
-		return;
+	page_pool_put_netmem(pool, page_to_netmem(page), dma_sync_size,
+			     allow_direct);
+}
 
-	page_pool_put_unrefed_page(pool, page, dma_sync_size, allow_direct);
-#endif
+static inline void page_pool_put_full_netmem(struct page_pool *pool,
+					     netmem_ref netmem,
+					     bool allow_direct)
+{
+	page_pool_put_netmem(pool, netmem, -1, allow_direct);
 }
 
 /**
@@ -331,7 +365,7 @@ static inline void page_pool_put_page(struct page_pool *pool,
 static inline void page_pool_put_full_page(struct page_pool *pool,
 					   struct page *page, bool allow_direct)
 {
-	page_pool_put_page(pool, page, -1, allow_direct);
+	page_pool_put_netmem(pool, page_to_netmem(page), -1, allow_direct);
 }
 
 /**
@@ -365,6 +399,18 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va,
 	page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct);
 }
 
+static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
+{
+	struct page *page = netmem_to_page(netmem);
+
+	dma_addr_t ret = page->dma_addr;
+
+	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
+		ret <<= PAGE_SHIFT;
+
+	return ret;
+}
+
 /**
  * page_pool_get_dma_addr() - Retrieve the stored DMA address.
  * @page:	page allocated from a page pool
@@ -374,16 +420,14 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va,
  */
 static inline dma_addr_t page_pool_get_dma_addr(const struct page *page)
 {
-	dma_addr_t ret = page->dma_addr;
-
-	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
-		ret <<= PAGE_SHIFT;
-
-	return ret;
+	return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page));
 }
 
-static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+static inline bool page_pool_set_dma_addr_netmem(netmem_ref netmem,
+						 dma_addr_t addr)
 {
+	struct page *page = netmem_to_page(netmem);
+
 	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
 		page->dma_addr = addr >> PAGE_SHIFT;
 
@@ -419,6 +463,11 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool,
 				      page_pool_get_dma_dir(pool));
 }
 
+static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+	return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr);
+}
+
 static inline bool page_pool_put(struct page_pool *pool)
 {
 	return refcount_dec_and_test(&pool->user_cnt);
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 9093a964fc33..b70bcc14ceda 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -6,6 +6,7 @@
 #include <linux/dma-direction.h>
 #include <linux/ptr_ring.h>
 #include <linux/types.h>
+#include <net/netmem.h>
 
 #define PP_FLAG_DMA_MAP		BIT(0) /* Should page_pool do the DMA
 					* map/unmap
@@ -40,7 +41,7 @@
 #define PP_ALLOC_CACHE_REFILL	64
 struct pp_alloc_cache {
 	u32 count;
-	struct page *cache[PP_ALLOC_CACHE_SIZE];
+	netmem_ref cache[PP_ALLOC_CACHE_SIZE];
 };
 
 /**
@@ -73,7 +74,7 @@ struct page_pool_params {
 		struct net_device *netdev;
 		unsigned int	flags;
 /* private: used by test code only */
-		void (*init_callback)(struct page *page, void *arg);
+		void (*init_callback)(netmem_ref netmem, void *arg);
 		void *init_arg;
 	);
 };
@@ -151,7 +152,7 @@ struct page_pool {
 	 */
 	__cacheline_group_begin(frag) __aligned(4 * sizeof(long));
 	long frag_users;
-	struct page *frag_page;
+	netmem_ref frag_page;
 	unsigned int frag_offset;
 	__cacheline_group_end(frag);
 
@@ -220,8 +221,12 @@ struct page_pool {
 };
 
 struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
+netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp);
 struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
 				  unsigned int size, gfp_t gfp);
+netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
+				       unsigned int *offset, unsigned int size,
+				       gfp_t gfp);
 struct page_pool *page_pool_create(const struct page_pool_params *params);
 struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
 					  int cpuid);
@@ -252,6 +257,9 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
 }
 #endif
 
+void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem,
+				  unsigned int dma_sync_size,
+				  bool allow_direct);
 void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
 				unsigned int dma_sync_size,
 				bool allow_direct);
diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h
index 6834356b2d2a..543e54e432a1 100644
--- a/include/trace/events/page_pool.h
+++ b/include/trace/events/page_pool.h
@@ -42,51 +42,53 @@ TRACE_EVENT(page_pool_release,
 TRACE_EVENT(page_pool_state_release,
 
 	TP_PROTO(const struct page_pool *pool,
-		 const struct page *page, u32 release),
+		 netmem_ref netmem, u32 release),
 
-	TP_ARGS(pool, page, release),
+	TP_ARGS(pool, netmem, release),
 
 	TP_STRUCT__entry(
 		__field(const struct page_pool *,	pool)
-		__field(const struct page *,		page)
+		__field(unsigned long,			netmem)
 		__field(u32,				release)
 		__field(unsigned long,			pfn)
 	),
 
 	TP_fast_assign(
 		__entry->pool		= pool;
-		__entry->page		= page;
+		__entry->netmem		= (__force unsigned long)netmem;
 		__entry->release	= release;
-		__entry->pfn		= page_to_pfn(page);
+		__entry->pfn		= netmem_to_pfn(netmem);
 	),
 
-	TP_printk("page_pool=%p page=%p pfn=0x%lx release=%u",
-		  __entry->pool, __entry->page, __entry->pfn, __entry->release)
+	TP_printk("page_pool=%p netmem=%p pfn=0x%lx release=%u",
+		  __entry->pool, (void *)__entry->netmem,
+		  __entry->pfn, __entry->release)
 );
 
 TRACE_EVENT(page_pool_state_hold,
 
 	TP_PROTO(const struct page_pool *pool,
-		 const struct page *page, u32 hold),
+		 netmem_ref netmem, u32 hold),
 
-	TP_ARGS(pool, page, hold),
+	TP_ARGS(pool, netmem, hold),
 
 	TP_STRUCT__entry(
 		__field(const struct page_pool *,	pool)
-		__field(const struct page *,		page)
+		__field(unsigned long,			netmem)
 		__field(u32,				hold)
 		__field(unsigned long,			pfn)
 	),
 
 	TP_fast_assign(
 		__entry->pool	= pool;
-		__entry->page	= page;
+		__entry->netmem	= (__force unsigned long)netmem;
 		__entry->hold	= hold;
-		__entry->pfn	= page_to_pfn(page);
+		__entry->pfn	= netmem_to_pfn(netmem);
 	),
 
-	TP_printk("page_pool=%p page=%p pfn=0x%lx hold=%u",
-		  __entry->pool, __entry->page, __entry->pfn, __entry->hold)
+	TP_printk("page_pool=%p netmem=%p pfn=0x%lx hold=%u",
+		  __entry->pool, (void *)__entry->netmem,
+		  __entry->pfn, __entry->hold)
 );
 
 TRACE_EVENT(page_pool_update_nid,
-- 
cgit v1.2.3


From 7b6f9ec6ad51125facadecf77dc6e62928186d2e Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Wed, 3 Jul 2024 09:16:40 +0200
Subject: drm/panthor: Fix sync-only jobs

A sync-only job is meant to provide a synchronization point on a
queue, so we can't return a NULL fence there, we have to add a signal
operation to the command stream which executes after all other
previously submitted jobs are done.

v2:
- Fixed a UAF bug
- Added R-bs

Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240703071640.231278-3-boris.brezillon@collabora.com
---
 include/uapi/drm/panthor_drm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index aaed8e12ad0b..926b1deb1116 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -802,6 +802,9 @@ struct drm_panthor_queue_submit {
 	 * Must be 64-bit/8-byte aligned (the size of a CS instruction)
 	 *
 	 * Can be zero if stream_addr is zero too.
+	 *
+	 * When the stream size is zero, the queue submit serves as a
+	 * synchronization point.
 	 */
 	__u32 stream_size;
 
@@ -822,6 +825,8 @@ struct drm_panthor_queue_submit {
 	 * ensure the GPU doesn't get garbage when reading the indirect command
 	 * stream buffers. If you want the cache flush to happen
 	 * unconditionally, pass a zero here.
+	 *
+	 * Ignored when stream_size is zero.
 	 */
 	__u32 latest_flush;
 
-- 
cgit v1.2.3


From 18a5daf0e4974ded74b2ed8a6aed1da49bde356d Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Thu, 13 Jun 2024 02:12:15 +0200
Subject: vfs: move d_lockref out of the area used by RCU lookup

Stock kernel scales worse than FreeBSD when doing a 20-way stat(2) on
the same tmpfs-backed file.

According to perf top:
  38.09%  lockref_put_return
  26.08%  lockref_get_not_dead
  25.60%  __d_lookup_rcu
   0.89%  clear_bhb_loop

__d_lookup_rcu is participating in cacheline ping pong due to the
embedded name sharing a cacheline with lockref.

Moving it out resolves the problem:
  41.50%  lockref_put_return
  41.03%  lockref_get_not_dead
   1.54%  clear_bhb_loop

benchmark (will-it-scale, Sapphire Rapids, tmpfs, ops/s):
FreeBSD:7219334
before:	5038006
after:	7842883 (+55%)

One minor remark: the 'after' result is unstable, fluctuating in the
range ~7.8 mln to ~9 mln during different runs.

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://lore.kernel.org/r/20240613001215.648829-3-mjguzik@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/dcache.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index bf53e3894aae..326dbccc3736 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -89,13 +89,18 @@ struct dentry {
 	struct inode *d_inode;		/* Where the name belongs to - NULL is
 					 * negative */
 	unsigned char d_iname[DNAME_INLINE_LEN];	/* small names */
+	/* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */
 
 	/* Ref lookup also touches following */
-	struct lockref d_lockref;	/* per-dentry lock and refcount */
 	const struct dentry_operations *d_op;
 	struct super_block *d_sb;	/* The root of the dentry tree */
 	unsigned long d_time;		/* used by d_revalidate */
 	void *d_fsdata;			/* fs-specific data */
+	/* --- cacheline 2 boundary (128 bytes) --- */
+	struct lockref d_lockref;	/* per-dentry lock and refcount
+					 * keep separate from RCU lookup area if
+					 * possible!
+					 */
 
 	union {
 		struct list_head d_lru;		/* LRU list */
-- 
cgit v1.2.3


From dc99c0ff53f588bb210b1e8b3314c7581cde68a2 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 18 Jun 2024 14:12:28 +0200
Subject: fs: fix dentry size

On CONFIG_SMP=y and on 32bit we need to decrease DNAME_INLINE_LEN to 36
btyes to end up with 128 bytes in total.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Links: https://lore.kernel.org/r/CAHk-=whtoqTSCcAvV-X-KPqoDWxS4vxmWpuKLB+Vv8=FtUd5vA@mail.gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/dcache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 326dbccc3736..58916b3f53ad 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -71,7 +71,7 @@ extern const struct qstr dotdot_name;
 # define DNAME_INLINE_LEN 40 /* 192 bytes */
 #else
 # ifdef CONFIG_SMP
-#  define DNAME_INLINE_LEN 40 /* 128 bytes */
+#  define DNAME_INLINE_LEN 36 /* 128 bytes */
 # else
 #  define DNAME_INLINE_LEN 44 /* 128 bytes */
 # endif
-- 
cgit v1.2.3


From 85b08b31a22b481ec6528130daf94eee4452e23f Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Fri, 28 Jun 2024 14:29:22 +0800
Subject: netfs, fscache: export fscache_put_volume() and add
 fscache_try_get_volume()

Export fscache_put_volume() and add fscache_try_get_volume()
helper function to allow cachefiles to get/put fscache_volume
via linux/fscache-cache.h.

Signed-off-by: Baokun Li <libaokun1@huawei.com>
Link: https://lore.kernel.org/r/20240628062930.2467993-2-libaokun@huaweicloud.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fscache-cache.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index bdf7f3eddf0a..4c91a019972b 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -19,6 +19,7 @@
 enum fscache_cache_trace;
 enum fscache_cookie_trace;
 enum fscache_access_trace;
+enum fscache_volume_trace;
 
 enum fscache_cache_state {
 	FSCACHE_CACHE_IS_NOT_PRESENT,	/* No cache is present for this name */
@@ -97,6 +98,11 @@ extern void fscache_withdraw_cookie(struct fscache_cookie *cookie);
 
 extern void fscache_io_error(struct fscache_cache *cache);
 
+extern struct fscache_volume *
+fscache_try_get_volume(struct fscache_volume *volume,
+		       enum fscache_volume_trace where);
+extern void fscache_put_volume(struct fscache_volume *volume,
+			       enum fscache_volume_trace where);
 extern void fscache_end_volume_access(struct fscache_volume *volume,
 				      struct fscache_cookie *cookie,
 				      enum fscache_access_trace why);
-- 
cgit v1.2.3


From 522018a0de6b6fcce60c04f86dfc5f0e4b6a1b36 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Fri, 28 Jun 2024 14:29:23 +0800
Subject: cachefiles: fix slab-use-after-free in fscache_withdraw_volume()

We got the following issue in our fault injection stress test:

==================================================================
BUG: KASAN: slab-use-after-free in fscache_withdraw_volume+0x2e1/0x370
Read of size 4 at addr ffff88810680be08 by task ondemand-04-dae/5798

CPU: 0 PID: 5798 Comm: ondemand-04-dae Not tainted 6.8.0-dirty #565
Call Trace:
 kasan_check_range+0xf6/0x1b0
 fscache_withdraw_volume+0x2e1/0x370
 cachefiles_withdraw_volume+0x31/0x50
 cachefiles_withdraw_cache+0x3ad/0x900
 cachefiles_put_unbind_pincount+0x1f6/0x250
 cachefiles_daemon_release+0x13b/0x290
 __fput+0x204/0xa00
 task_work_run+0x139/0x230

Allocated by task 5820:
 __kmalloc+0x1df/0x4b0
 fscache_alloc_volume+0x70/0x600
 __fscache_acquire_volume+0x1c/0x610
 erofs_fscache_register_volume+0x96/0x1a0
 erofs_fscache_register_fs+0x49a/0x690
 erofs_fc_fill_super+0x6c0/0xcc0
 vfs_get_super+0xa9/0x140
 vfs_get_tree+0x8e/0x300
 do_new_mount+0x28c/0x580
 [...]

Freed by task 5820:
 kfree+0xf1/0x2c0
 fscache_put_volume.part.0+0x5cb/0x9e0
 erofs_fscache_unregister_fs+0x157/0x1b0
 erofs_kill_sb+0xd9/0x1c0
 deactivate_locked_super+0xa3/0x100
 vfs_get_super+0x105/0x140
 vfs_get_tree+0x8e/0x300
 do_new_mount+0x28c/0x580
 [...]
==================================================================

Following is the process that triggers the issue:

        mount failed         |         daemon exit
------------------------------------------------------------
 deactivate_locked_super        cachefiles_daemon_release
  erofs_kill_sb
   erofs_fscache_unregister_fs
    fscache_relinquish_volume
     __fscache_relinquish_volume
      fscache_put_volume(fscache_volume, fscache_volume_put_relinquish)
       zero = __refcount_dec_and_test(&fscache_volume->ref, &ref);
                                 cachefiles_put_unbind_pincount
                                  cachefiles_daemon_unbind
                                   cachefiles_withdraw_cache
                                    cachefiles_withdraw_volumes
                                     list_del_init(&volume->cache_link)
       fscache_free_volume(fscache_volume)
        cache->ops->free_volume
         cachefiles_free_volume
          list_del_init(&cachefiles_volume->cache_link);
        kfree(fscache_volume)
                                     cachefiles_withdraw_volume
                                      fscache_withdraw_volume
                                       fscache_volume->n_accesses
                                       // fscache_volume UAF !!!

The fscache_volume in cache->volumes must not have been freed yet, but its
reference count may be 0. So use the new fscache_try_get_volume() helper
function try to get its reference count.

If the reference count of fscache_volume is 0, fscache_put_volume() is
freeing it, so wait for it to be removed from cache->volumes.

If its reference count is not 0, call cachefiles_withdraw_volume() with
reference count protection to avoid the above issue.

Fixes: fe2140e2f57f ("cachefiles: Implement volume support")
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Link: https://lore.kernel.org/r/20240628062930.2467993-3-libaokun@huaweicloud.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/trace/events/fscache.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index a6190aa1b406..f1a73aa83fbb 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -35,12 +35,14 @@ enum fscache_volume_trace {
 	fscache_volume_get_cookie,
 	fscache_volume_get_create_work,
 	fscache_volume_get_hash_collision,
+	fscache_volume_get_withdraw,
 	fscache_volume_free,
 	fscache_volume_new_acquire,
 	fscache_volume_put_cookie,
 	fscache_volume_put_create_work,
 	fscache_volume_put_hash_collision,
 	fscache_volume_put_relinquish,
+	fscache_volume_put_withdraw,
 	fscache_volume_see_create_work,
 	fscache_volume_see_hash_wake,
 	fscache_volume_wait_create_work,
@@ -120,12 +122,14 @@ enum fscache_access_trace {
 	EM(fscache_volume_get_cookie,		"GET cook ")		\
 	EM(fscache_volume_get_create_work,	"GET creat")		\
 	EM(fscache_volume_get_hash_collision,	"GET hcoll")		\
+	EM(fscache_volume_get_withdraw,		"GET withd")            \
 	EM(fscache_volume_free,			"FREE     ")		\
 	EM(fscache_volume_new_acquire,		"NEW acq  ")		\
 	EM(fscache_volume_put_cookie,		"PUT cook ")		\
 	EM(fscache_volume_put_create_work,	"PUT creat")		\
 	EM(fscache_volume_put_hash_collision,	"PUT hcoll")		\
 	EM(fscache_volume_put_relinquish,	"PUT relnq")		\
+	EM(fscache_volume_put_withdraw,		"PUT withd")            \
 	EM(fscache_volume_see_create_work,	"SEE creat")		\
 	EM(fscache_volume_see_hash_wake,	"SEE hwake")		\
 	E_(fscache_volume_wait_create_work,	"WAIT crea")
-- 
cgit v1.2.3


From ad59baa3169591e0b4cf1a217c9139f2145f4c7f Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Wed, 3 Jul 2024 09:25:21 +0200
Subject: slab, rust: extend kmalloc() alignment guarantees to remove Rust
 padding

Slab allocators have been guaranteeing natural alignment for
power-of-two sizes since commit 59bb47985c1d ("mm, sl[aou]b: guarantee
natural alignment for kmalloc(power-of-two)"), while any other sizes are
guaranteed to be aligned only to ARCH_KMALLOC_MINALIGN bytes (although
in practice are aligned more than that in non-debug scenarios).

Rust's allocator API specifies size and alignment per allocation, which
have to satisfy the following rules, per Alice Ryhl [1]:

  1. The alignment is a power of two.
  2. The size is non-zero.
  3. When you round up the size to the next multiple of the alignment,
     then it must not overflow the signed type isize / ssize_t.

In order to map this to kmalloc()'s guarantees, some requested
allocation sizes have to be padded to the next power-of-two size [2].
For example, an allocation of size 96 and alignment of 32 will be padded
to an allocation of size 128, because the existing kmalloc-96 bucket
doesn't guarantee alignent above ARCH_KMALLOC_MINALIGN. Without slab
debugging active, the layout of the kmalloc-96 slabs however naturally
align the objects to 32 bytes, so extending the size to 128 bytes is
wasteful.

To improve the situation we can extend the kmalloc() alignment
guarantees in a way that

1) doesn't change the current slab layout (and thus does not increase
   internal fragmentation) when slab debugging is not active
2) reduces waste in the Rust allocator use case
3) is a superset of the current guarantee for power-of-two sizes.

The extended guarantee is that alignment is at least the largest
power-of-two divisor of the requested size. For power-of-two sizes the
largest divisor is the size itself, but let's keep this case documented
separately for clarity.

For current kmalloc size buckets, it means kmalloc-96 will guarantee
alignment of 32 bytes and kmalloc-196 will guarantee 64 bytes.

This covers the rules 1 and 2 above of Rust's API as long as the size is
a multiple of the alignment. The Rust layer should now only need to
round up the size to the next multiple if it isn't, while enforcing the
rule 3.

Implementation-wise, this changes the alignment calculation in
create_boot_cache(). While at it also do the calulation only for caches
with the SLAB_KMALLOC flag, because the function is also used to create
the initial kmem_cache and kmem_cache_node caches, where no alignment
guarantee is necessary.

In the Rust allocator's krealloc_aligned(), remove the code that padded
sizes to the next power of two (suggested by Alice Ryhl) as it's no
longer necessary with the new guarantees.

Reported-by: Alice Ryhl <aliceryhl@google.com>
Reported-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lore.kernel.org/all/CAH5fLggjrbdUuT-H-5vbQfMazjRDpp2%2Bk3%3DYhPyS17ezEqxwcw@mail.gmail.com/ [1]
Link: https://lore.kernel.org/all/CAH5fLghsZRemYUwVvhk77o6y1foqnCeDzW4WZv6ScEWna2+_jw@mail.gmail.com/ [2]
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index ed6bee5ec2b6..640cea6e6323 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -604,7 +604,8 @@ void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
  *
  * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN
  * bytes. For @size of power of two bytes, the alignment is also guaranteed
- * to be at least to the size.
+ * to be at least to the size. For other sizes, the alignment is guaranteed to
+ * be at least the largest power-of-two divisor of @size.
  *
  * The @flags argument may be one of the GFP flags defined at
  * include/linux/gfp_types.h and described at
-- 
cgit v1.2.3


From 72e0fe2241ce113cbba339ca8c2450b167774530 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Mon, 1 Jul 2024 12:12:58 -0700
Subject: mm/slab: Introduce kmem_buckets typedef

Encapsulate the concept of a single set of kmem_caches that are used
for the kmalloc size buckets. Redefine kmalloc_caches as an array
of these buckets (for the different global cache buckets).

Signed-off-by: Kees Cook <kees@kernel.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 640cea6e6323..922bf15794f7 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -426,8 +426,9 @@ enum kmalloc_cache_type {
 	NR_KMALLOC_TYPES
 };
 
-extern struct kmem_cache *
-kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
+typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1];
+
+extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES];
 
 /*
  * Define gfp bits that should not be set for KMALLOC_NORMAL.
-- 
cgit v1.2.3


From 67f2df3b82d091ed095d0e47e1f3a9d3e18e4e41 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Mon, 1 Jul 2024 12:12:59 -0700
Subject: mm/slab: Plumb kmem_buckets into __do_kmalloc_node()

Introduce CONFIG_SLAB_BUCKETS which provides the infrastructure to
support separated kmalloc buckets (in the following kmem_buckets_create()
patches and future codetag-based separation). Since this will provide
a mitigation for a very common case of exploits, it is recommended to
enable this feature for general purpose distros. By default, the new
Kconfig will be enabled if CONFIG_SLAB_FREELIST_HARDENED is enabled (and
it is added to the hardening.config Kconfig fragment).

To be able to choose which buckets to allocate from, make the buckets
available to the internal kmalloc interfaces by adding them as the
second argument, rather than depending on the buckets being chosen from
the fixed set of global buckets. Where the bucket is not available,
pass NULL, which means "use the default system kmalloc bucket set"
(the prior existing behavior), as implemented in kmalloc_slab().

To avoid adding the extra argument when !CONFIG_SLAB_BUCKETS, only the
top-level macros and static inlines use the buckets argument (where
they are stripped out and compiled out respectively). The actual extern
functions can then be built without the argument, and the internals
fall back to the global kmalloc buckets unconditionally.

Co-developed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Kees Cook <kees@kernel.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 922bf15794f7..a9200d453087 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -570,6 +570,21 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
 				   int node) __assume_slab_alignment __malloc;
 #define kmem_cache_alloc_node(...)	alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))
 
+/*
+ * These macros allow declaring a kmem_buckets * parameter alongside size, which
+ * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call
+ * sites don't have to pass NULL.
+ */
+#ifdef CONFIG_SLAB_BUCKETS
+#define DECL_BUCKET_PARAMS(_size, _b)	size_t (_size), kmem_buckets *(_b)
+#define PASS_BUCKET_PARAMS(_size, _b)	(_size), (_b)
+#define PASS_BUCKET_PARAM(_b)		(_b)
+#else
+#define DECL_BUCKET_PARAMS(_size, _b)	size_t (_size)
+#define PASS_BUCKET_PARAMS(_size, _b)	(_size)
+#define PASS_BUCKET_PARAM(_b)		NULL
+#endif
+
 /*
  * The following functions are not to be used directly and are intended only
  * for internal use from kmalloc() and kmalloc_node()
@@ -579,7 +594,7 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
 void *__kmalloc_noprof(size_t size, gfp_t flags)
 				__assume_kmalloc_alignment __alloc_size(1);
 
-void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node)
+void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
 				__assume_kmalloc_alignment __alloc_size(1);
 
 void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
@@ -680,7 +695,7 @@ static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gf
 				kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
 				flags, node, size);
 	}
-	return __kmalloc_node_noprof(size, flags, node);
+	return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node);
 }
 #define kmalloc_node(...)			alloc_hooks(kmalloc_node_noprof(__VA_ARGS__))
 
@@ -731,8 +746,10 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(voi
  */
 #define kcalloc(n, size, flags)		kmalloc_array(n, size, (flags) | __GFP_ZERO)
 
-void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node,
-				  unsigned long caller) __alloc_size(1);
+void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node,
+					 unsigned long caller) __alloc_size(1);
+#define kmalloc_node_track_caller_noprof(size, flags, node, caller) \
+	__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller)
 #define kmalloc_node_track_caller(...)		\
 	alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_))
 
@@ -758,7 +775,7 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_
 		return NULL;
 	if (__builtin_constant_p(n) && __builtin_constant_p(size))
 		return kmalloc_node_noprof(bytes, flags, node);
-	return __kmalloc_node_noprof(bytes, flags, node);
+	return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node);
 }
 #define kmalloc_array_node(...)			alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__))
 
-- 
cgit v1.2.3


From 2e8000b826fcd2716449d09753d5ed843067881e Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Mon, 1 Jul 2024 12:13:00 -0700
Subject: mm/slab: Introduce kvmalloc_buckets_node() that can take kmem_buckets
 argument

Plumb kmem_buckets arguments through kvmalloc_node_noprof() so it is
possible to provide an API to perform kvmalloc-style allocations with
a particular set of buckets. Introduce kvmalloc_buckets_node() that takes a
kmem_buckets argument.

Signed-off-by: Kees Cook <kees@kernel.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index a9200d453087..837005314f96 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -799,7 +799,9 @@ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags)
 #define kzalloc(...)				alloc_hooks(kzalloc_noprof(__VA_ARGS__))
 #define kzalloc_node(_size, _flags, _node)	kmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
 
-extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_size(1);
+void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1);
+#define kvmalloc_node_noprof(size, flags, node)	\
+	__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node)
 #define kvmalloc_node(...)			alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__))
 
 #define kvmalloc(_size, _flags)			kvmalloc_node(_size, _flags, NUMA_NO_NODE)
-- 
cgit v1.2.3


From b32801d1255be1da62ea8134df3ed9f3331fba12 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Mon, 1 Jul 2024 12:13:01 -0700
Subject: mm/slab: Introduce kmem_buckets_create() and family

Dedicated caches are available for fixed size allocations via
kmem_cache_alloc(), but for dynamically sized allocations there is only
the global kmalloc API's set of buckets available. This means it isn't
possible to separate specific sets of dynamically sized allocations into
a separate collection of caches.

This leads to a use-after-free exploitation weakness in the Linux
kernel since many heap memory spraying/grooming attacks depend on using
userspace-controllable dynamically sized allocations to collide with
fixed size allocations that end up in same cache.

While CONFIG_RANDOM_KMALLOC_CACHES provides a probabilistic defense
against these kinds of "type confusion" attacks, including for fixed
same-size heap objects, we can create a complementary deterministic
defense for dynamically sized allocations that are directly user
controlled. Addressing these cases is limited in scope, so isolating these
kinds of interfaces will not become an unbounded game of whack-a-mole. For
example, many pass through memdup_user(), making isolation there very
effective.

In order to isolate user-controllable dynamically-sized
allocations from the common system kmalloc allocations, introduce
kmem_buckets_create(), which behaves like kmem_cache_create(). Introduce
kmem_buckets_alloc(), which behaves like kmem_cache_alloc(). Introduce
kmem_buckets_alloc_track_caller() for where caller tracking is
needed. Introduce kmem_buckets_valloc() for cases where vmalloc fallback
is needed. Note that these caches are specifically flagged with
SLAB_NO_MERGE, since merging would defeat the entire purpose of the
mitigation.

This can also be used in the future to extend allocation profiling's use
of code tagging to implement per-caller allocation cache isolation[1]
even for dynamic allocations.

Memory allocation pinning[2] is still needed to plug the Use-After-Free
cross-allocator weakness (where attackers can arrange to free an
entire slab page and have it reallocated to a different cache),
but that is an existing and separate issue which is complementary
to this improvement. Development continues for that feature via the
SLAB_VIRTUAL[3] series (which could also provide guard pages -- another
complementary improvement).

Link: https://lore.kernel.org/lkml/202402211449.401382D2AF@keescook [1]
Link: https://googleprojectzero.blogspot.com/2021/10/how-simple-linux-kernel-memory.html [2]
Link: https://lore.kernel.org/lkml/20230915105933.495735-1-matteorizzo@google.com/ [3]
Signed-off-by: Kees Cook <kees@kernel.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 837005314f96..d99afce36098 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -549,6 +549,10 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
 
 void kmem_cache_free(struct kmem_cache *s, void *objp);
 
+kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
+				  unsigned int useroffset, unsigned int usersize,
+				  void (*ctor)(void *));
+
 /*
  * Bulk allocation and freeing operations. These are accelerated in an
  * allocator specific way to avoid taking locks repeatedly or building
@@ -682,6 +686,12 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
 }
 #define kmalloc(...)				alloc_hooks(kmalloc_noprof(__VA_ARGS__))
 
+#define kmem_buckets_alloc(_b, _size, _flags)	\
+	alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
+
+#define kmem_buckets_alloc_track_caller(_b, _size, _flags)	\
+	alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_))
+
 static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node)
 {
 	if (__builtin_constant_p(size) && size) {
@@ -809,6 +819,8 @@ void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
 #define kvzalloc(_size, _flags)			kvmalloc(_size, (_flags)|__GFP_ZERO)
 
 #define kvzalloc_node(_size, _flags, _node)	kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
+#define kmem_buckets_valloc(_b, _size, _flags)	\
+	alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
 
 static inline __alloc_size(1, 2) void *
 kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)
-- 
cgit v1.2.3


From 6555acdefc758a4dae7038c3f2301f311e287218 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Tue, 2 Jul 2024 19:21:18 +0200
Subject: um: time-travel: support time-travel protocol broadcast messages

Add a message type to the time-travel protocol to broadcast
a small (64-bit) value to all participants in a simulation.
The main use case is to have an identical message come to
all participants in a simulation, e.g. to separate out logs
for different tests running in a single simulation.

Down in the guts of time_travel_handle_message() we can't
use printk() and not even printk_deferred(), so just store
the message and print it at the start of the userspace()
function.

Unfortunately this means that other prints in the kernel
can actually bypass the message, but in most cases where
this is used, for example to separate test logs, userspace
will be involved. Also, even if we could use
printk_deferred(), we'd still need to flush it out in the
userspace() function since otherwise userspace messages
might cross it.

As a result, this is a reasonable compromise, there's no
need to have any core changes and it solves the main use
case we have for it.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Link: https://patch.msgid.link/20240702192118.c4093bc5b15e.I2ca8d006b67feeb866ac2017af7b741c9e06445a@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/um_timetravel.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/um_timetravel.h b/include/uapi/linux/um_timetravel.h
index ca3238222b6d..078ea401aa2a 100644
--- a/include/uapi/linux/um_timetravel.h
+++ b/include/uapi/linux/um_timetravel.h
@@ -123,6 +123,17 @@ enum um_timetravel_ops {
 	 *	the simulation.
 	 */
 	UM_TIMETRAVEL_GET_TOD		= 8,
+
+	/**
+	 * @UM_TIMETRAVEL_BROADCAST: Send/Receive a broadcast message.
+	 *	This message can be used to sync all components in the system
+	 *	with a single message, if the calender gets the message, the
+	 *	calender broadcast the message to all components, and if a
+	 *	component receives it it should act based on it e.g print a
+	 *	message to it's log system.
+	 *	(calendar <-> host)
+	 */
+	UM_TIMETRAVEL_BROADCAST		= 9,
 };
 
 #endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */
-- 
cgit v1.2.3


From bfb80d8bc92fa70f5b44a57ed2b24d57685fe188 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 2 Jul 2024 19:21:21 +0200
Subject: um: add shared memory optimisation for time-travel=ext

With external time travel, a LOT of message can end up
being exchanged on the socket, taking a significant
amount of time just to do that.

Add a new shared memory optimisation to that, where a
number of changes are made:
 - the controller sends a client ID and a shared memory FD
   (and a logging FD we don't use) in the ACK message to
   the initial START
 - the shared memory holds the current time and the
   free_until value, so that there's no need to exchange
   messages for that
 - if the client that's running has shared memory support,
   any client (the running one included) can request the
   next time it wants to run inside the shared memory,
   rather than sending a message, by also updating the
   free_until value
 - when shared memory is enabled, RUN/WAIT messages no
   longer have an ACK, further cutting down on messages

Together, this can reduce the number of messages very
significantly, and reduce overall test/simulation run time.

Co-developed-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Link: https://patch.msgid.link/20240702192118.6ad0a083f574.Ie41206c8ce4507fe26b991937f47e86c24ca7a31@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/um_timetravel.h | 179 ++++++++++++++++++++++++++++++++++---
 1 file changed, 165 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/um_timetravel.h b/include/uapi/linux/um_timetravel.h
index 078ea401aa2a..546a690b0346 100644
--- a/include/uapi/linux/um_timetravel.h
+++ b/include/uapi/linux/um_timetravel.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
 /*
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019 - 2023 Intel Corporation
  */
 #ifndef _UAPI_LINUX_UM_TIMETRAVEL_H
 #define _UAPI_LINUX_UM_TIMETRAVEL_H
@@ -50,6 +39,36 @@ struct um_timetravel_msg {
 	__u64 time;
 };
 
+/* max number of file descriptors that can be sent/received in a message */
+#define UM_TIMETRAVEL_MAX_FDS 2
+
+/**
+ * enum um_timetravel_shared_mem_fds - fds sent in ACK message for START message
+ */
+enum um_timetravel_shared_mem_fds {
+	/**
+	 * @UM_TIMETRAVEL_SHARED_MEMFD: Index of the shared memory file
+	 *	descriptor in the control message
+	 */
+	UM_TIMETRAVEL_SHARED_MEMFD,
+	/**
+	 * @UM_TIMETRAVEL_SHARED_LOGFD: Index of the logging file descriptor
+	 *	in the control message
+	 */
+	UM_TIMETRAVEL_SHARED_LOGFD,
+	UM_TIMETRAVEL_SHARED_MAX_FDS,
+};
+
+/**
+ * enum um_timetravel_start_ack - ack-time mask for start message
+ */
+enum um_timetravel_start_ack {
+	/**
+	 * @UM_TIMETRAVEL_START_ACK_ID: client ID that controller allocated.
+	 */
+	UM_TIMETRAVEL_START_ACK_ID = 0xffff,
+};
+
 /**
  * enum um_timetravel_ops - Operation codes
  */
@@ -57,7 +76,9 @@ enum um_timetravel_ops {
 	/**
 	 * @UM_TIMETRAVEL_ACK: response (ACK) to any previous message,
 	 *	this usually doesn't carry any data in the 'time' field
-	 *	unless otherwise specified below
+	 *	unless otherwise specified below, note: while using shared
+	 *	memory no ACK for WAIT and RUN messages, for more info see
+	 *	&struct um_timetravel_schedshm.
 	 */
 	UM_TIMETRAVEL_ACK		= 0,
 
@@ -136,4 +157,134 @@ enum um_timetravel_ops {
 	UM_TIMETRAVEL_BROADCAST		= 9,
 };
 
+/* version of struct um_timetravel_schedshm */
+#define UM_TIMETRAVEL_SCHEDSHM_VERSION 2
+
+/**
+ * enum um_timetravel_schedshm_cap - time travel capabilities of every client
+ *
+ * These flags must be set immediately after processing the ACK to
+ * the START message, before sending any message to the controller.
+ */
+enum um_timetravel_schedshm_cap {
+	/**
+	 * @UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE: client can read current time
+	 *	update internal time request to shared memory and read
+	 *	free until and send no Ack on RUN and doesn't expect ACK on
+	 *	WAIT.
+	 */
+	UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE = 0x1,
+};
+
+/**
+ * enum um_timetravel_schedshm_flags - time travel flags of every client
+ */
+enum um_timetravel_schedshm_flags {
+	/**
+	 * @UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN: client has a request to run.
+	 *	It's set by client when it has a request to run, if (and only
+	 *	if) the @running_id points to a client that is able to use
+	 *	shared memory, i.e. has %UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE
+	 *	(this includes the client itself). Otherwise, a message must
+	 *	be used.
+	 */
+	UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN = 0x1,
+};
+
+/**
+ * DOC: Time travel shared memory overview
+ *
+ * The main purpose of the shared memory is to avoid all time travel message
+ * that don't need any action, for example current time can be held in shared
+ * memory without the need of any client to send a message UM_TIMETRAVEL_GET
+ * in order to know what's the time.
+ *
+ * Since this is shared memory with all clients and controller and controller
+ * creates the shared memory space, all time values are absolute to controller
+ * time. So first time client connects to shared memory mode it should take the
+ * current_time value in shared memory and keep it internally as a diff to
+ * shared memory times, and once shared memory is initialized, any interaction
+ * with the controller must happen in the controller time domain, including any
+ * messages (for clients that are not using shared memory, the controller will
+ * handle an offset and make the clients think they start at time zero.)
+ *
+ * Along with the shared memory file descriptor is sent to the client a logging
+ * file descriptor, to have all logs related to shared memory,
+ * logged into one place. note: to have all logs synced into log file at write,
+ * file should be flushed (fflush) after writing to it.
+ *
+ * To avoid memory corruption, we define below for each field who can write to
+ * it at what time, defined in the structure fields.
+ *
+ * To avoid having to pack this struct, all fields in it must be naturally aligned
+ * (i.e. aligned to their size).
+ */
+
+/**
+ * union um_timetravel_schedshm_client - UM time travel client struct
+ *
+ * Every entity using the shared memory including the controller has a place in
+ * the um_timetravel_schedshm clients array, that holds info related to the client
+ * using the shared memory, and can be set only by the client after it gets the
+ * fd memory.
+ *
+ * @capa: bit fields with client capabilities see
+ *	&enum um_timetravel_schedshm_cap, set by client once after getting the
+ *	shared memory file descriptor.
+ * @flags: bit fields for flags see &enum um_timetravel_schedshm_flags for doc.
+ * @req_time: request time to run, set by client on every request it needs.
+ * @name: unique id sent to the controller by client with START message.
+ */
+union um_timetravel_schedshm_client {
+	struct {
+		__u32 capa;
+		__u32 flags;
+		__u64 req_time;
+		__u64 name;
+	};
+	char reserve[128]; /* reserved for future usage */
+};
+
+/**
+ * struct um_timetravel_schedshm - UM time travel shared memory struct
+ *
+ * @hdr: header fields:
+ * @version: Current version struct UM_TIMETRAVEL_SCHEDSHM_VERSION,
+ *	set by controller once at init, clients must check this after mapping
+ *	and work without shared memory if they cannot handle the indicated
+ *	version.
+ * @len: Length of all the memory including header (@hdr), clients should once
+ *	per connection first mmap the header and take the length (@len) to remap the entire size.
+ *	This is done in order to support dynamic struct size letting number of
+ *	clients be dynamic based on controller support.
+ * @free_until: Stores the next request to run by any client, in order for the
+ *	current client to know how long it can still run. A client needs to (at
+ *	least) reload this value immediately after communicating with any other
+ *	client, since the controller will update this field when a new request
+ *	is made by any client. Clients also must update this value when they
+ *	insert/update an own request into the shared memory while not running
+ *	themselves, and the new request is before than the current value.
+ * current_time: Current time, can only be set by the client in running state
+ *	(indicated by @running_id), though that client may only run until @free_until,
+ *	so it must remain smaller than @free_until.
+ * @running_id: The current client in state running, set before a client is
+ *	notified that it's now running.
+ * @max_clients: size of @clients array, set once at init by the controller.
+ * @clients: clients array see &union um_timetravel_schedshm_client for doc,
+ *	set only by client.
+ */
+struct um_timetravel_schedshm {
+	union {
+		struct {
+			__u32 version;
+			__u32 len;
+			__u64 free_until;
+			__u64 current_time;
+			__u16 running_id;
+			__u16 max_clients;
+		};
+		char hdr[4096]; /* align to 4K page size */
+	};
+	union um_timetravel_schedshm_client clients[];
+};
 #endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */
-- 
cgit v1.2.3


From 244389bd42870640c4b5ef672a360da329b579ed Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Wed, 3 Jul 2024 10:55:17 +0100
Subject: ASoC: cs35l56: Limit Speaker Volume to +12dB maximum

Change CS35L56_MAIN_RENDER_USER_VOLUME_MAX to 48, to limit the maximum
value of the Speaker Volume control to +12dB. The minimum value is
unchanged so that the default 0dB has the same integer control value.

The original maximum of 400 (+100dB) was the largest value that can be
mathematically handled by the DSP. The actual maximum amplification is
+12dB.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://patch.msgid.link/20240703095517.208077-3-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l56.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h
index 642ef690ebc2..a6aa112e5741 100644
--- a/include/sound/cs35l56.h
+++ b/include/sound/cs35l56.h
@@ -207,7 +207,7 @@
 
 /* CS35L56_MAIN_RENDER_USER_VOLUME */
 #define CS35L56_MAIN_RENDER_USER_VOLUME_MIN		-400
-#define CS35L56_MAIN_RENDER_USER_VOLUME_MAX		400
+#define CS35L56_MAIN_RENDER_USER_VOLUME_MAX		48
 #define CS35L56_MAIN_RENDER_USER_VOLUME_MASK		0x0000FFC0
 #define CS35L56_MAIN_RENDER_USER_VOLUME_SHIFT		6
 #define CS35L56_MAIN_RENDER_USER_VOLUME_SIGNBIT		9
-- 
cgit v1.2.3


From d688ffa269421cec73c7e13fd0cb03879b07db89 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 26 Jun 2024 16:32:29 -0600
Subject: perf: arm_pmuv3: Include asm/arm_pmuv3.h from linux/perf/arm_pmuv3.h

The arm64 asm/arm_pmuv3.h depends on defines from
linux/perf/arm_pmuv3.h. Rather than depend on include order, follow the
usual pattern of "linux" headers including "asm" headers of the same
name.

With this change, the include of linux/kvm_host.h is problematic due to
circular includes:

In file included from ../arch/arm64/include/asm/arm_pmuv3.h:9,
                 from ../include/linux/perf/arm_pmuv3.h:312,
                 from ../include/kvm/arm_pmu.h:11,
                 from ../arch/arm64/include/asm/kvm_host.h:38,
                 from ../arch/arm64/mm/init.c:41:
../include/linux/kvm_host.h:383:30: error: field 'arch' has incomplete type

Switching to asm/kvm_host.h solves the issue.

Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Link: https://lore.kernel.org/r/20240626-arm-pmu-3-9-icntr-v2-5-c9784b4f4065@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/perf/arm_pmuv3.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index 46377e134d67..7867db04ec98 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -309,4 +309,6 @@
 		}						\
 	} while (0)
 
+#include <asm/arm_pmuv3.h>
+
 #endif
-- 
cgit v1.2.3


From d69d804845985c29ab5be5a4b3b1f4787893daf8 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 1 Jul 2024 14:07:37 +0200
Subject: driver core: have match() callback in struct bus_type take a const *

In the match() callback, the struct device_driver * should not be
changed, so change the function callback to be a const *.  This is one
step of many towards making the driver core safe to have struct
device_driver in read-only memory.

Because the match() callback is in all busses, all busses are modified
to handle this properly.  This does entail switching some container_of()
calls to container_of_const() to properly handle the constant *.

For some busses, like PCI and USB and HV, the const * is cast away in
the match callback as those busses do want to modify those structures at
this point in time (they have a local lock in the driver structure.)
That will have to be changed in the future if they wish to have their
struct device * in read-only-memory.

Cc: Rafael J. Wysocki <rafael@kernel.org>
Reviewed-by: Alex Elder <elder@kernel.org>
Acked-by: Sumit Garg <sumit.garg@linaro.org>
Link: https://lore.kernel.org/r/2024070136-wrongdoer-busily-01e8@gregkh
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/acpi/acpi_bus.h             |  2 +-
 include/linux/arm_ffa.h             |  2 +-
 include/linux/cdx/cdx_bus.h         |  2 +-
 include/linux/device/bus.h          |  2 +-
 include/linux/dfl.h                 |  2 +-
 include/linux/eisa.h                |  2 +-
 include/linux/fsi.h                 |  2 +-
 include/linux/fsl/mc.h              |  2 +-
 include/linux/gameport.h            |  2 +-
 include/linux/greybus.h             |  2 +-
 include/linux/hyperv.h              |  6 +-----
 include/linux/i2c.h                 |  2 +-
 include/linux/i3c/device.h          |  5 +----
 include/linux/maple.h               |  2 +-
 include/linux/mcb.h                 |  5 +----
 include/linux/mdio.h                | 19 ++++++-------------
 include/linux/mhi.h                 |  2 +-
 include/linux/mhi_ep.h              |  2 +-
 include/linux/moxtet.h              |  9 ++-------
 include/linux/nd.h                  |  6 +-----
 include/linux/pci-epf.h             |  3 +--
 include/linux/pci.h                 |  6 ++----
 include/linux/phy.h                 |  2 +-
 include/linux/pnp.h                 |  2 +-
 include/linux/rio.h                 |  2 +-
 include/linux/scmi_protocol.h       |  2 +-
 include/linux/serio.h               |  2 +-
 include/linux/slimbus.h             |  2 +-
 include/linux/soc/qcom/apr.h        |  2 +-
 include/linux/soundwire/sdw_type.h  |  2 +-
 include/linux/spi/spi.h             |  6 ++----
 include/linux/ssb/ssb.h             |  2 +-
 include/linux/tc.h                  |  2 +-
 include/linux/tee_drv.h             |  2 +-
 include/linux/virtio.h              |  5 +----
 include/scsi/scsi_transport_iscsi.h |  2 +-
 include/sound/ac97/codec.h          |  5 +----
 include/xen/xenbus.h                |  5 +----
 38 files changed, 46 insertions(+), 86 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 1a4dfd7a1c4a..08d27a1a0072 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -562,7 +562,7 @@ static inline void *acpi_driver_data(struct acpi_device *d)
 }
 
 #define to_acpi_device(d)	container_of(d, struct acpi_device, dev)
-#define to_acpi_driver(d)	container_of(d, struct acpi_driver, drv)
+#define to_acpi_driver(d)	container_of_const(d, struct acpi_driver, drv)
 
 static inline struct acpi_device *acpi_dev_parent(struct acpi_device *adev)
 {
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index c82d56768101..ae70704bfa90 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -149,7 +149,7 @@ struct ffa_driver {
 	struct device_driver driver;
 };
 
-#define to_ffa_driver(d) container_of(d, struct ffa_driver, driver)
+#define to_ffa_driver(d) container_of_const(d, struct ffa_driver, driver)
 
 static inline void ffa_dev_set_drvdata(struct ffa_device *fdev, void *data)
 {
diff --git a/include/linux/cdx/cdx_bus.h b/include/linux/cdx/cdx_bus.h
index b57118aaa679..79bb80e56790 100644
--- a/include/linux/cdx/cdx_bus.h
+++ b/include/linux/cdx/cdx_bus.h
@@ -211,7 +211,7 @@ struct cdx_driver {
 };
 
 #define to_cdx_driver(_drv) \
-	container_of(_drv, struct cdx_driver, driver)
+	container_of_const(_drv, struct cdx_driver, driver)
 
 /* Macro to avoid include chaining to get THIS_MODULE */
 #define cdx_driver_register(drv) \
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index 5ef4ec1c36c3..807831d6bf0f 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -81,7 +81,7 @@ struct bus_type {
 	const struct attribute_group **dev_groups;
 	const struct attribute_group **drv_groups;
 
-	int (*match)(struct device *dev, struct device_driver *drv);
+	int (*match)(struct device *dev, const struct device_driver *drv);
 	int (*uevent)(const struct device *dev, struct kobj_uevent_env *env);
 	int (*probe)(struct device *dev);
 	void (*sync_state)(struct device *dev);
diff --git a/include/linux/dfl.h b/include/linux/dfl.h
index 0a7a00a0ee7f..1f02db0c1897 100644
--- a/include/linux/dfl.h
+++ b/include/linux/dfl.h
@@ -71,7 +71,7 @@ struct dfl_driver {
 };
 
 #define to_dfl_dev(d) container_of(d, struct dfl_device, dev)
-#define to_dfl_drv(d) container_of(d, struct dfl_driver, drv)
+#define to_dfl_drv(d) container_of_const(d, struct dfl_driver, drv)
 
 /*
  * use a macro to avoid include chaining to get THIS_MODULE.
diff --git a/include/linux/eisa.h b/include/linux/eisa.h
index b012e30afebd..f98200cae637 100644
--- a/include/linux/eisa.h
+++ b/include/linux/eisa.h
@@ -60,7 +60,7 @@ struct eisa_driver {
 	struct device_driver         driver;
 };
 
-#define to_eisa_driver(drv) container_of(drv,struct eisa_driver, driver)
+#define to_eisa_driver(drv) container_of_const(drv,struct eisa_driver, driver)
 
 /* These external functions are only available when EISA support is enabled. */
 #ifdef CONFIG_EISA
diff --git a/include/linux/fsi.h b/include/linux/fsi.h
index 3df8c54868df..8c5eef808788 100644
--- a/include/linux/fsi.h
+++ b/include/linux/fsi.h
@@ -44,7 +44,7 @@ struct fsi_driver {
 };
 
 #define to_fsi_dev(devp) container_of(devp, struct fsi_device, dev)
-#define to_fsi_drv(drvp) container_of(drvp, struct fsi_driver, drv)
+#define to_fsi_drv(drvp) container_of_const(drvp, struct fsi_driver, drv)
 
 extern int fsi_driver_register(struct fsi_driver *fsi_drv);
 extern void fsi_driver_unregister(struct fsi_driver *fsi_drv);
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index a1b3de87a3d1..083c860fd28e 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -56,7 +56,7 @@ struct fsl_mc_driver {
 };
 
 #define to_fsl_mc_driver(_drv) \
-	container_of(_drv, struct fsl_mc_driver, driver)
+	container_of_const(_drv, struct fsl_mc_driver, driver)
 
 /**
  * enum fsl_mc_pool_type - Types of allocatable MC bus resources
diff --git a/include/linux/gameport.h b/include/linux/gameport.h
index 07e370113b2b..86d62fdafd7a 100644
--- a/include/linux/gameport.h
+++ b/include/linux/gameport.h
@@ -58,7 +58,7 @@ struct gameport_driver {
 
 	bool ignore;
 };
-#define to_gameport_driver(d)	container_of(d, struct gameport_driver, driver)
+#define to_gameport_driver(d)	container_of_const(d, struct gameport_driver, driver)
 
 int gameport_open(struct gameport *gameport, struct gameport_driver *drv, int mode);
 void gameport_close(struct gameport *gameport);
diff --git a/include/linux/greybus.h b/include/linux/greybus.h
index 634c9511cf78..4d58e27ceaf6 100644
--- a/include/linux/greybus.h
+++ b/include/linux/greybus.h
@@ -64,7 +64,7 @@ struct greybus_driver {
 
 	struct device_driver driver;
 };
-#define to_greybus_driver(d) container_of(d, struct greybus_driver, driver)
+#define to_greybus_driver(d) container_of_const(d, struct greybus_driver, driver)
 
 static inline void greybus_set_drvdata(struct gb_bundle *bundle, void *data)
 {
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 5e39baa7f6cb..22c22fb91042 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1330,11 +1330,7 @@ struct hv_device {
 
 
 #define device_to_hv_device(d)	container_of_const(d, struct hv_device, device)
-
-static inline struct hv_driver *drv_to_hv_drv(struct device_driver *d)
-{
-	return container_of(d, struct hv_driver, driver);
-}
+#define drv_to_hv_drv(d)	container_of_const(d, struct hv_driver, driver)
 
 static inline void hv_set_drvdata(struct hv_device *dev, void *data)
 {
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 9709537370ee..cde3de35a89f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -304,7 +304,7 @@ struct i2c_driver {
 
 	u32 flags;
 };
-#define to_i2c_driver(d) container_of(d, struct i2c_driver, driver)
+#define to_i2c_driver(d) container_of_const(d, struct i2c_driver, driver)
 
 /**
  * struct i2c_client - represent an I2C slave device
diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h
index e119f11948ef..0a8a44ac2f02 100644
--- a/include/linux/i3c/device.h
+++ b/include/linux/i3c/device.h
@@ -183,10 +183,7 @@ struct i3c_driver {
 	const struct i3c_device_id *id_table;
 };
 
-static inline struct i3c_driver *drv_to_i3cdrv(struct device_driver *drv)
-{
-	return container_of(drv, struct i3c_driver, driver);
-}
+#define drv_to_i3cdrv(__drv)	container_of_const(__drv, struct i3c_driver, driver)
 
 struct device *i3cdev_to_dev(struct i3c_device *i3cdev);
 
diff --git a/include/linux/maple.h b/include/linux/maple.h
index 9aae44efcfd4..3be4e567473c 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -97,7 +97,7 @@ int maple_add_packet(struct maple_device *mdev, u32 function,
 void maple_clear_dev(struct maple_device *mdev);
 
 #define to_maple_dev(n) container_of(n, struct maple_device, dev)
-#define to_maple_driver(n) container_of(n, struct maple_driver, drv)
+#define to_maple_driver(n) container_of_const(n, struct maple_driver, drv)
 
 #define maple_get_drvdata(d)		dev_get_drvdata(&(d)->dev)
 #define maple_set_drvdata(d,p)		dev_set_drvdata(&(d)->dev, (p))
diff --git a/include/linux/mcb.h b/include/linux/mcb.h
index 0b971b24a804..4ab2691f51a6 100644
--- a/include/linux/mcb.h
+++ b/include/linux/mcb.h
@@ -94,10 +94,7 @@ struct mcb_driver {
 	void (*shutdown)(struct mcb_device *mdev);
 };
 
-static inline struct mcb_driver *to_mcb_driver(struct device_driver *drv)
-{
-	return container_of(drv, struct mcb_driver, driver);
-}
+#define to_mcb_driver(__drv)	container_of_const(__drv, struct mcb_driver, driver)
 
 static inline void *mcb_get_drvdata(struct mcb_device *dev)
 {
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 68f8d2e970d4..efeca5bd7600 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -31,7 +31,7 @@ struct mdio_device {
 	struct mii_bus *bus;
 	char modalias[MDIO_NAME_SIZE];
 
-	int (*bus_match)(struct device *dev, struct device_driver *drv);
+	int (*bus_match)(struct device *dev, const struct device_driver *drv);
 	void (*device_free)(struct mdio_device *mdiodev);
 	void (*device_remove)(struct mdio_device *mdiodev);
 
@@ -57,11 +57,8 @@ struct mdio_driver_common {
 };
 #define MDIO_DEVICE_FLAG_PHY		1
 
-static inline struct mdio_driver_common *
-to_mdio_common_driver(const struct device_driver *driver)
-{
-	return container_of(driver, struct mdio_driver_common, driver);
-}
+#define to_mdio_common_driver(__drv_c)	container_of_const(__drv_c, struct mdio_driver_common,	\
+							   driver)
 
 /* struct mdio_driver: Generic MDIO driver */
 struct mdio_driver {
@@ -80,12 +77,8 @@ struct mdio_driver {
 	void (*shutdown)(struct mdio_device *mdiodev);
 };
 
-static inline struct mdio_driver *
-to_mdio_driver(const struct device_driver *driver)
-{
-	return container_of(to_mdio_common_driver(driver), struct mdio_driver,
-			    mdiodrv);
-}
+#define to_mdio_driver(__drv_m)	container_of_const(to_mdio_common_driver(__drv_m),	\
+						   struct mdio_driver, mdiodrv)
 
 /* device driver data */
 static inline void mdiodev_set_drvdata(struct mdio_device *mdio, void *data)
@@ -105,7 +98,7 @@ void mdio_device_remove(struct mdio_device *mdiodev);
 void mdio_device_reset(struct mdio_device *mdiodev, int value);
 int mdio_driver_register(struct mdio_driver *drv);
 void mdio_driver_unregister(struct mdio_driver *drv);
-int mdio_device_bus_match(struct device *dev, struct device_driver *drv);
+int mdio_device_bus_match(struct device *dev, const struct device_driver *drv);
 
 static inline void mdio_device_get(struct mdio_device *mdiodev)
 {
diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index b573f15762f8..ce1f9d737964 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -526,7 +526,7 @@ struct mhi_driver {
 	struct device_driver driver;
 };
 
-#define to_mhi_driver(drv) container_of(drv, struct mhi_driver, driver)
+#define to_mhi_driver(drv) container_of_const(drv, struct mhi_driver, driver)
 #define to_mhi_device(dev) container_of(dev, struct mhi_device, dev)
 
 /**
diff --git a/include/linux/mhi_ep.h b/include/linux/mhi_ep.h
index 11bf3212f782..7b40fc8cbe77 100644
--- a/include/linux/mhi_ep.h
+++ b/include/linux/mhi_ep.h
@@ -221,7 +221,7 @@ struct mhi_ep_driver {
 };
 
 #define to_mhi_ep_device(dev) container_of(dev, struct mhi_ep_device, dev)
-#define to_mhi_ep_driver(drv) container_of(drv, struct mhi_ep_driver, driver)
+#define to_mhi_ep_driver(drv) container_of_const(drv, struct mhi_ep_driver, driver)
 
 /*
  * module_mhi_ep_driver() - Helper macro for drivers that don't do
diff --git a/include/linux/moxtet.h b/include/linux/moxtet.h
index ac577699edfd..dfa4800306ee 100644
--- a/include/linux/moxtet.h
+++ b/include/linux/moxtet.h
@@ -61,13 +61,8 @@ struct moxtet_driver {
 	struct device_driver		driver;
 };
 
-static inline struct moxtet_driver *
-to_moxtet_driver(struct device_driver *drv)
-{
-	if (!drv)
-		return NULL;
-	return container_of(drv, struct moxtet_driver, driver);
-}
+#define to_moxtet_driver(__drv)	\
+	( __drv ? container_of_const(__drv, struct moxtet_driver, driver) : NULL )
 
 extern int __moxtet_register_driver(struct module *owner,
 				    struct moxtet_driver *mdrv);
diff --git a/include/linux/nd.h b/include/linux/nd.h
index b9771ba1ef87..fa099e295f78 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -84,11 +84,7 @@ struct nd_device_driver {
 	void (*notify)(struct device *dev, enum nvdimm_event event);
 };
 
-static inline struct nd_device_driver *to_nd_device_driver(
-		struct device_driver *drv)
-{
-	return container_of(drv, struct nd_device_driver, drv);
-};
+#define to_nd_device_driver(__drv)	container_of_const(__drv, struct nd_device_driver, drv)
 
 /**
  * struct nd_namespace_common - core infrastructure of a namespace
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index adee6a1b35db..980a3c90a5ee 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -105,8 +105,7 @@ struct pci_epf_driver {
 	const struct pci_epf_device_id	*id_table;
 };
 
-#define to_pci_epf_driver(drv) (container_of((drv), struct pci_epf_driver, \
-				driver))
+#define to_pci_epf_driver(drv) container_of_const((drv), struct pci_epf_driver, driver)
 
 /**
  * struct pci_epf_bar - represents the BAR of EPF device
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cafc5ab1cbcb..aa1c3280b7d0 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -958,10 +958,8 @@ struct pci_driver {
 	bool driver_managed_dma;
 };
 
-static inline struct pci_driver *to_pci_driver(struct device_driver *drv)
-{
-    return drv ? container_of(drv, struct pci_driver, driver) : NULL;
-}
+#define to_pci_driver(__drv)	\
+	( __drv ? container_of_const(__drv, struct pci_driver, driver) : NULL )
 
 /**
  * PCI_DEVICE - macro used to describe a specific PCI device
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e6e83304558e..8237d5006a99 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1179,7 +1179,7 @@ struct phy_driver {
 	int (*led_polarity_set)(struct phy_device *dev, int index,
 				unsigned long modes);
 };
-#define to_phy_driver(d) container_of(to_mdio_common_driver(d),		\
+#define to_phy_driver(d) container_of_const(to_mdio_common_driver(d),		\
 				      struct phy_driver, mdiodrv)
 
 #define PHY_ANY_ID "MATCH ANY PHY"
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index 7f2ff95d2deb..b7a7158aaf65 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -383,7 +383,7 @@ struct pnp_driver {
 	struct device_driver driver;
 };
 
-#define	to_pnp_driver(drv) container_of(drv, struct pnp_driver, driver)
+#define	to_pnp_driver(drv) container_of_const(drv, struct pnp_driver, driver)
 
 struct pnp_card_driver {
 	struct list_head global_list;
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 2cd637268b4f..3c29f40f3c94 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -465,7 +465,7 @@ struct rio_driver {
 	struct device_driver driver;
 };
 
-#define	to_rio_driver(drv) container_of(drv,struct rio_driver, driver)
+#define	to_rio_driver(drv) container_of_const(drv,struct rio_driver, driver)
 
 union rio_pw_msg {
 	struct {
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index 3a9bb5b9a9e8..688466a0e816 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -945,7 +945,7 @@ struct scmi_device {
 	struct scmi_handle *handle;
 };
 
-#define to_scmi_dev(d) container_of(d, struct scmi_device, dev)
+#define to_scmi_dev(d) container_of_const(d, struct scmi_device, dev)
 
 struct scmi_device_id {
 	u8 protocol_id;
diff --git a/include/linux/serio.h b/include/linux/serio.h
index 7ca41af93b37..bf2191f25350 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -80,7 +80,7 @@ struct serio_driver {
 
 	struct device_driver driver;
 };
-#define to_serio_driver(d)	container_of(d, struct serio_driver, driver)
+#define to_serio_driver(d)	container_of_const(d, struct serio_driver, driver)
 
 int serio_open(struct serio *serio, struct serio_driver *drv);
 void serio_close(struct serio *serio);
diff --git a/include/linux/slimbus.h b/include/linux/slimbus.h
index 3042385b7b40..a4608d9a9684 100644
--- a/include/linux/slimbus.h
+++ b/include/linux/slimbus.h
@@ -91,7 +91,7 @@ struct slim_driver {
 	struct device_driver		driver;
 	const struct slim_device_id	*id_table;
 };
-#define to_slim_driver(d) container_of(d, struct slim_driver, driver)
+#define to_slim_driver(d) container_of_const(d, struct slim_driver, driver)
 
 /**
  * struct slim_val_inf - Slimbus value or information element
diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h
index 7161a3183eda..a532d1e4b1f4 100644
--- a/include/linux/soc/qcom/apr.h
+++ b/include/linux/soc/qcom/apr.h
@@ -162,7 +162,7 @@ struct apr_driver {
 };
 
 typedef struct apr_driver gpr_driver_t;
-#define to_apr_driver(d) container_of(d, struct apr_driver, driver)
+#define to_apr_driver(d) container_of_const(d, struct apr_driver, driver)
 
 /*
  * use a macro to avoid include chaining to get THIS_MODULE
diff --git a/include/linux/soundwire/sdw_type.h b/include/linux/soundwire/sdw_type.h
index 693320b4f5c2..d405935a45fe 100644
--- a/include/linux/soundwire/sdw_type.h
+++ b/include/linux/soundwire/sdw_type.h
@@ -13,7 +13,7 @@ static inline int is_sdw_slave(const struct device *dev)
 	return dev->type == &sdw_slave_type;
 }
 
-#define drv_to_sdw_driver(_drv) container_of(_drv, struct sdw_driver, driver)
+#define drv_to_sdw_driver(_drv) container_of_const(_drv, struct sdw_driver, driver)
 
 #define sdw_register_driver(drv) \
 	__sdw_register_driver(drv, THIS_MODULE)
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e8e1e798924f..3fc559686d38 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -351,10 +351,8 @@ struct spi_driver {
 	struct device_driver	driver;
 };
 
-static inline struct spi_driver *to_spi_driver(struct device_driver *drv)
-{
-	return drv ? container_of(drv, struct spi_driver, driver) : NULL;
-}
+#define to_spi_driver(__drv)   \
+	( __drv ? container_of_const(__drv, struct spi_driver, driver) : NULL )
 
 extern int __spi_register_driver(struct module *owner, struct spi_driver *sdrv);
 
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index a2257380c3f1..e1fb11e0f12c 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -325,7 +325,7 @@ struct ssb_driver {
 
 	struct device_driver drv;
 };
-#define drv_to_ssb_drv(_drv) container_of(_drv, struct ssb_driver, drv)
+#define drv_to_ssb_drv(_drv) container_of_const(_drv, struct ssb_driver, drv)
 
 extern int __ssb_driver_register(struct ssb_driver *drv, struct module *owner);
 #define ssb_driver_register(drv) \
diff --git a/include/linux/tc.h b/include/linux/tc.h
index 1638660abf5e..8416bae9b126 100644
--- a/include/linux/tc.h
+++ b/include/linux/tc.h
@@ -108,7 +108,7 @@ struct tc_driver {
 	struct device_driver driver;
 };
 
-#define to_tc_driver(drv) container_of(drv, struct tc_driver, driver)
+#define to_tc_driver(drv) container_of_const(drv, struct tc_driver, driver)
 
 /*
  * Return TURBOchannel clock frequency in Hz.
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index 786b9ae6cf4d..a54c203000ed 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -298,6 +298,6 @@ struct tee_client_driver {
 };
 
 #define to_tee_client_driver(d) \
-		container_of(d, struct tee_client_driver, driver)
+		container_of_const(d, struct tee_client_driver, driver)
 
 #endif /*__TEE_DRV_H*/
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 96fea920873b..ecc5cb7b8c91 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -209,10 +209,7 @@ struct virtio_driver {
 	int (*restore)(struct virtio_device *dev);
 };
 
-static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
-{
-	return container_of(drv, struct virtio_driver, driver);
-}
+#define drv_to_virtio(__drv)	container_of_const(__drv, struct virtio_driver, driver)
 
 /* use a macro to avoid include chaining to get THIS_MODULE */
 #define register_virtio_driver(drv) \
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index fb3399e4cd29..bd1243657c01 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -495,7 +495,7 @@ iscsi_destroy_flashnode_sess(struct iscsi_bus_flash_session *fnode_sess);
 
 extern void iscsi_destroy_all_flashnode(struct Scsi_Host *shost);
 extern int iscsi_flashnode_bus_match(struct device *dev,
-				     struct device_driver *drv);
+				     const struct device_driver *drv);
 extern struct device *
 iscsi_find_flashnode_sess(struct Scsi_Host *shost, void *data,
 			  int (*fn)(struct device *dev, void *data));
diff --git a/include/sound/ac97/codec.h b/include/sound/ac97/codec.h
index 2fc641cb1982..882b849b9255 100644
--- a/include/sound/ac97/codec.h
+++ b/include/sound/ac97/codec.h
@@ -73,10 +73,7 @@ static inline struct ac97_codec_device *to_ac97_device(struct device *d)
 	return container_of(d, struct ac97_codec_device, dev);
 }
 
-static inline struct ac97_codec_driver *to_ac97_driver(struct device_driver *d)
-{
-	return container_of(d, struct ac97_codec_driver, driver);
-}
+#define to_ac97_driver(__drv) container_of_const(__drv, struct ac97_codec_driver, driver)
 
 #if IS_ENABLED(CONFIG_AC97_BUS_NEW)
 int snd_ac97_codec_driver_register(struct ac97_codec_driver *drv);
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index ac22cf08c09f..3f90bdd387b6 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -124,10 +124,7 @@ struct xenbus_driver {
 	void (*reclaim_memory)(struct xenbus_device *dev);
 };
 
-static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
-{
-	return container_of(drv, struct xenbus_driver, driver);
-}
+#define to_xenbus_driver(__drv)	container_of_const(__drv, struct xenbus_driver, driver)
 
 int __must_check __xenbus_register_frontend(struct xenbus_driver *drv,
 					    struct module *owner,
-- 
cgit v1.2.3


From 633478695d6b52e0b52f1273d8d11275b627b87d Mon Sep 17 00:00:00 2001
From: Slark Xiao <slark_xiao@163.com>
Date: Mon, 1 Jul 2024 10:12:15 +0800
Subject: bus: mhi: host: Allow controller drivers to specify name for the MHI
 controller

MHI devices usually have a product/device name to identify each device
uniquely. So let's specify that name in 'struct mhi_controller' so that the
client drivers can use this name to uniquely identify the devices and apply
any device specific quirks.

Signed-off-by: Slark Xiao <slark_xiao@163.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20240701021216.17734-2-slark_xiao@163.com
[mani: reworked subject and description]
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 include/linux/mhi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index b573f15762f8..fabd6ed8d258 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -290,6 +290,7 @@ struct mhi_controller_config {
 
 /**
  * struct mhi_controller - Master MHI controller structure
+ * @name: Device name of the MHI controller
  * @cntrl_dev: Pointer to the struct device of physical bus acting as the MHI
  *            controller (required)
  * @mhi_dev: MHI device instance for the controller
@@ -367,6 +368,7 @@ struct mhi_controller_config {
  * they can be populated depending on the usecase.
  */
 struct mhi_controller {
+	const char *name;
 	struct device *cntrl_dev;
 	struct mhi_device *mhi_dev;
 	struct dentry *debugfs_dentry;
-- 
cgit v1.2.3


From 62ce9ef1479729b1a3f8a1b19900e28d68b3625e Mon Sep 17 00:00:00 2001
From: Marco Felsch <m.felsch@pengutronix.de>
Date: Mon, 1 Jul 2024 15:21:24 +0200
Subject: usb: typec: tcpci: add support to set connector orientation

This add the support to set the optional connector orientation bit which
is part of the optional CONFIG_STANDARD_OUTPUT register 0x18 [1]. This
allows system designers to connect the tcpc orientation pin directly to
the 2:1 ss-mux.

[1] https://www.usb.org/sites/default/files/documents/usb-port_controller_specification_rev2.0_v1.0_0.pdf

Signed-off-by: Marco Felsch <m.felsch@pengutronix.de>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20240701132133.3054394-1-m.felsch@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/tcpci.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h
index 47a86b8a4a50..0ab39b6ea205 100644
--- a/include/linux/usb/tcpci.h
+++ b/include/linux/usb/tcpci.h
@@ -47,6 +47,9 @@
 #define TCPC_SINK_FAST_ROLE_SWAP	BIT(0)
 
 #define TCPC_CONFIG_STD_OUTPUT		0x18
+#define TCPC_CONFIG_STD_OUTPUT_ORIENTATION_MASK		BIT(0)
+#define TCPC_CONFIG_STD_OUTPUT_ORIENTATION_NORMAL	0
+#define TCPC_CONFIG_STD_OUTPUT_ORIENTATION_FLIPPED	1
 
 #define TCPC_TCPC_CTRL			0x19
 #define TCPC_TCPC_CTRL_ORIENTATION	BIT(0)
@@ -127,6 +130,7 @@
 #define TCPC_DEV_CAP_2			0x26
 #define TCPC_STD_INPUT_CAP		0x28
 #define TCPC_STD_OUTPUT_CAP		0x29
+#define TCPC_STD_OUTPUT_CAP_ORIENTATION	BIT(0)
 
 #define TCPC_MSG_HDR_INFO		0x2e
 #define TCPC_MSG_HDR_INFO_DATA_ROLE	BIT(3)
@@ -209,6 +213,9 @@ struct tcpci;
  *		swap following Discover Identity on SOP' occurs.
  *		Return true when the TCPM is allowed to request a Vconn swap
  *		after Discovery Identity on SOP.
+ * @set_orientation:
+ *		Optional; Enable setting the connector orientation
+ *		CONFIG_STANDARD_OUTPUT (0x18) bit0.
  */
 struct tcpci_data {
 	struct regmap *regmap;
@@ -216,6 +223,7 @@ struct tcpci_data {
 	unsigned char auto_discharge_disconnect:1;
 	unsigned char vbus_vsafe0v:1;
 	unsigned char cable_comm_capable:1;
+	unsigned char set_orientation:1;
 
 	int (*init)(struct tcpci *tcpci, struct tcpci_data *data);
 	int (*set_vconn)(struct tcpci *tcpci, struct tcpci_data *data,
-- 
cgit v1.2.3


From bab2f5e8fd5d2f759db26b78d9db57412888f187 Mon Sep 17 00:00:00 2001
From: Ekansh Gupta <quic_ekangupt@quicinc.com>
Date: Fri, 28 Jun 2024 12:45:01 +0100
Subject: misc: fastrpc: Restrict untrusted app to attach to privileged PD

Untrusted application with access to only non-secure fastrpc device
node can attach to root_pd or static PDs if it can make the respective
init request. This can cause problems as the untrusted application
can send bad requests to root_pd or static PDs. Add changes to reject
attach to privileged PDs if the request is being made using non-secure
fastrpc device node.

Fixes: 0871561055e6 ("misc: fastrpc: Add support for audiopd")
Cc: stable <stable@kernel.org>
Signed-off-by: Ekansh Gupta <quic_ekangupt@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20240628114501.14310-7-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/misc/fastrpc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index f33d914d8f46..91583690bddc 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -8,11 +8,14 @@
 #define FASTRPC_IOCTL_ALLOC_DMA_BUFF	_IOWR('R', 1, struct fastrpc_alloc_dma_buf)
 #define FASTRPC_IOCTL_FREE_DMA_BUFF	_IOWR('R', 2, __u32)
 #define FASTRPC_IOCTL_INVOKE		_IOWR('R', 3, struct fastrpc_invoke)
+/* This ioctl is only supported with secure device nodes */
 #define FASTRPC_IOCTL_INIT_ATTACH	_IO('R', 4)
 #define FASTRPC_IOCTL_INIT_CREATE	_IOWR('R', 5, struct fastrpc_init_create)
 #define FASTRPC_IOCTL_MMAP		_IOWR('R', 6, struct fastrpc_req_mmap)
 #define FASTRPC_IOCTL_MUNMAP		_IOWR('R', 7, struct fastrpc_req_munmap)
+/* This ioctl is only supported with secure device nodes */
 #define FASTRPC_IOCTL_INIT_ATTACH_SNS	_IO('R', 8)
+/* This ioctl is only supported with secure device nodes */
 #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static)
 #define FASTRPC_IOCTL_MEM_MAP		_IOWR('R', 10, struct fastrpc_mem_map)
 #define FASTRPC_IOCTL_MEM_UNMAP		_IOWR('R', 11, struct fastrpc_mem_unmap)
-- 
cgit v1.2.3


From f9a748fa5ce0958bea2a03c25f092fbc22e72c67 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Thu, 2 May 2024 16:48:21 +0100
Subject: parport: Remove 'drivers' list

The list has been empty since:
 'commit 3275158fa52a ("parport: remove use of devmodel")'

This also means we can remove the 'list_head' from
struct parport_driver.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Acked-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Link: https://lore.kernel.org/r/20240502154823.67235-2-linux@treblig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/parport.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/parport.h b/include/linux/parport.h
index fff39bc30629..2a4424b60156 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -258,7 +258,6 @@ struct parport_driver {
 	int (*probe)(struct pardevice *);
 	struct device_driver driver;
 	bool devmodel;
-	struct list_head list;
 };
 
 #define to_parport_driver(n) container_of(n, struct parport_driver, driver)
-- 
cgit v1.2.3


From ed06e054906c5e7853a36d9ddad8fc94dbb8c252 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Thu, 2 May 2024 16:48:22 +0100
Subject: parport: Remove attach function pointer

The attach function pointers haven't actually been called since:
  'commit 3275158fa52a ("parport: remove use of devmodel")'
topped adding entries to the drivers list.

If you're converting a driver, look at the 'match_port' function
pointer instead.

(There are lots of comment references to 'attach' all over, but they
probably need some deeper understanding to check the semantics
to see if they can be replaced by match_port).

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Acked-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Link: https://lore.kernel.org/r/20240502154823.67235-3-linux@treblig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/parport.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/parport.h b/include/linux/parport.h
index 2a4424b60156..190de3569e25 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -252,7 +252,6 @@ struct parport {
 
 struct parport_driver {
 	const char *name;
-	void (*attach) (struct parport *);
 	void (*detach) (struct parport *);
 	void (*match_port)(struct parport *);
 	int (*probe)(struct pardevice *);
-- 
cgit v1.2.3


From dfd19866d1a3f681cc12aae67ab05011eb3aa3d8 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Thu, 2 May 2024 16:48:23 +0100
Subject: parport: Remove parport_driver.devmodel

'devmodel' hasn't actually been used since:
  'commit 3275158fa52a ("parport: remove use of devmodel")'
and everyone now has it set to true and has been fixed up; remove
the flag.

(There are still comments all over about it)

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Acked-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Link: https://lore.kernel.org/r/20240502154823.67235-4-linux@treblig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/parport.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/parport.h b/include/linux/parport.h
index 190de3569e25..464c2ad28039 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -256,7 +256,6 @@ struct parport_driver {
 	void (*match_port)(struct parport *);
 	int (*probe)(struct pardevice *);
 	struct device_driver driver;
-	bool devmodel;
 };
 
 #define to_parport_driver(n) container_of(n, struct parport_driver, driver)
@@ -299,9 +298,6 @@ int __must_check __parport_register_driver(struct parport_driver *,
  *	to receive notifications about ports being found in the
  *	system, as well as ports no longer available.
  *
- *	If devmodel is true then the new device model is used
- *	for registration.
- *
  *	The @driver structure is allocated by the caller and must not be
  *	deallocated until after calling parport_unregister_driver().
  *
-- 
cgit v1.2.3


From eb054d67b21a53f6ccf3af49a62fb99397b48fc2 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Wed, 3 Jul 2024 11:16:03 +0100
Subject: iommu/arm-smmu-v3: Add support for dirty tracking in domain alloc

This provides all the infrastructure to enable dirty tracking if the
hardware has the capability and domain alloc request for it.

Also, add a device_iommu_capable() check in iommufd core for
IOMMU_CAP_DIRTY_TRACKING before we request a user domain with dirty
tracking support.

Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
as it will finally be enabled in a subsequent patch.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Link: https://lore.kernel.org/r/20240703101604.2576-5-shameerali.kolothum.thodi@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/io-pgtable.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 86cf1f7ae389..f9a81761bfce 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -85,6 +85,8 @@ struct io_pgtable_cfg {
 	 *
 	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability
 	 *	attributes set in the TCR for a non-coherent page-table walker.
+	 *
+	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable.
 	 */
 	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
@@ -92,6 +94,7 @@ struct io_pgtable_cfg {
 	#define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT	BIT(4)
 	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
 	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
+	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;
-- 
cgit v1.2.3


From 58cd0cba82976a5a21bdfc78783952b147837a84 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Fri, 28 Jun 2024 19:44:42 +0300
Subject: drm: Add helpers for q4 fixed point values

Add helpers to convert between q4 fixed point and integer/fraction
values. Also add the format/argument macros required to printk q4 fixed
point variables. The q4 notation is based on the short variant described
by

https://en.wikipedia.org/wiki/Q_(number_format)

where only the number of fraction bits in the fixed point value are
defined, while the full size is deducted from the container type, that
is the size of int for these helpers. Using the fxp_ prefix, which makes
moving these helpers outside of drm to a more generic place easier, if
they prove to be useful.

These are needed by later patches dumping the Display Stream Compression
configuration in DRM core and in the i915 driver to replace the
corresponding bpp_x16 helpers defined locally in the driver.

v2: Use the more generic/descriptive fxp_q4 prefix instead of drm_x16.
   (Jani)

Cc: Jani Nikula <jani.nikula@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240628164451.1177612-2-imre.deak@intel.com
---
 include/drm/drm_fixed.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h
index 81572d32db0c..ef8bc8d72039 100644
--- a/include/drm/drm_fixed.h
+++ b/include/drm/drm_fixed.h
@@ -214,4 +214,27 @@ static inline s64 drm_fixp_exp(s64 x)
 	return sum;
 }
 
+static inline int fxp_q4_from_int(int val_int)
+{
+	return val_int << 4;
+}
+
+static inline int fxp_q4_to_int(int val_q4)
+{
+	return val_q4 >> 4;
+}
+
+static inline int fxp_q4_to_int_roundup(int val_q4)
+{
+	return (val_q4 + 0xf) >> 4;
+}
+
+static inline int fxp_q4_to_frac(int val_q4)
+{
+	return val_q4 & 0xf;
+}
+
+#define FXP_Q4_FMT		"%d.%04d"
+#define FXP_Q4_ARGS(val_q4)	fxp_q4_to_int(val_q4), (fxp_q4_to_frac(val_q4) * 625)
+
 #endif
-- 
cgit v1.2.3


From 74c3f5da233637bdf828e1ce67a5c1083630a47b Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Fri, 28 Jun 2024 19:44:43 +0300
Subject: drm/display/dsc: Add a helper to dump the DSC configuration

Add a helper to dump the Display Stream Compression configuration, taken
into use in the i915 driver by a later patch.

v2:
- Rebase on the s/DRM_X16/FXP_Q4 change.
- s/DSC configration/DSC configuration in the function documentation.

Acked-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240628164451.1177612-3-imre.deak@intel.com
---
 include/drm/display/drm_dsc_helper.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/drm/display/drm_dsc_helper.h b/include/drm/display/drm_dsc_helper.h
index 913aa2071232..2c2b9033f60f 100644
--- a/include/drm/display/drm_dsc_helper.h
+++ b/include/drm/display/drm_dsc_helper.h
@@ -17,6 +17,8 @@ enum drm_dsc_params_type {
 	DRM_DSC_1_2_420,
 };
 
+struct drm_printer;
+
 void drm_dsc_dp_pps_header_init(struct dp_sdp_header *pps_header);
 int drm_dsc_dp_rc_buffer_size(u8 rc_buffer_block_size, u8 rc_buffer_size);
 void drm_dsc_pps_payload_pack(struct drm_dsc_picture_parameter_set *pps_sdp,
@@ -28,6 +30,7 @@ int drm_dsc_compute_rc_parameters(struct drm_dsc_config *vdsc_cfg);
 u8 drm_dsc_initial_scale_value(const struct drm_dsc_config *dsc);
 u32 drm_dsc_flatness_det_thresh(const struct drm_dsc_config *dsc);
 u32 drm_dsc_get_bpp_int(const struct drm_dsc_config *vdsc_cfg);
+void drm_dsc_dump_config(struct drm_printer *p, int indent, const struct drm_dsc_config *cfg);
 
 #endif /* _DRM_DSC_HELPER_H_ */
 
-- 
cgit v1.2.3


From da042a3655151157c06e71a583e883ab2d86d1ff Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 2 Jul 2024 17:10:19 +0200
Subject: block: split integrity support out of bio.h

Split struct bio_integrity_payload and the related prototypes out of
bio.h into a separate bio-integrity.h header so that it is only pulled
in by the few places that need it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240702151047.1746127-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio-integrity.h | 153 +++++++++++++++++++++++++++++++++++++++++
 include/linux/bio.h           | 156 ------------------------------------------
 include/linux/blk-integrity.h |   1 +
 3 files changed, 154 insertions(+), 156 deletions(-)
 create mode 100644 include/linux/bio-integrity.h

(limited to 'include')

diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
new file mode 100644
index 000000000000..70ef19a0dc7e
--- /dev/null
+++ b/include/linux/bio-integrity.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BIO_INTEGRITY_H
+#define _LINUX_BIO_INTEGRITY_H
+
+#include <linux/bio.h>
+
+enum bip_flags {
+	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
+	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
+	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
+	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
+	BIP_IP_CHECKSUM		= 1 << 4, /* IP checksum */
+	BIP_INTEGRITY_USER	= 1 << 5, /* Integrity payload is user address */
+	BIP_COPY_USER		= 1 << 6, /* Kernel bounce buffer in use */
+};
+
+struct bio_integrity_payload {
+	struct bio		*bip_bio;	/* parent bio */
+
+	struct bvec_iter	bip_iter;
+
+	unsigned short		bip_vcnt;	/* # of integrity bio_vecs */
+	unsigned short		bip_max_vcnt;	/* integrity bio_vec slots */
+	unsigned short		bip_flags;	/* control flags */
+
+	struct bvec_iter	bio_iter;	/* for rewinding parent bio */
+
+	struct work_struct	bip_work;	/* I/O completion */
+
+	struct bio_vec		*bip_vec;
+	struct bio_vec		bip_inline_vecs[];/* embedded bvec array */
+};
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+
+#define bip_for_each_vec(bvl, bip, iter)				\
+	for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter)
+
+#define bio_for_each_integrity_vec(_bvl, _bio, _iter)			\
+	for_each_bio(_bio)						\
+		bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
+
+static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
+{
+	if (bio->bi_opf & REQ_INTEGRITY)
+		return bio->bi_integrity;
+
+	return NULL;
+}
+
+static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
+{
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+
+	if (bip)
+		return bip->bip_flags & flag;
+
+	return false;
+}
+
+static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
+{
+	return bip->bip_iter.bi_sector;
+}
+
+static inline void bip_set_seed(struct bio_integrity_payload *bip,
+				sector_t seed)
+{
+	bip->bip_iter.bi_sector = seed;
+}
+
+struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp,
+		unsigned int nr);
+int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len,
+		unsigned int offset);
+int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
+void bio_integrity_unmap_free_user(struct bio *bio);
+bool bio_integrity_prep(struct bio *bio);
+void bio_integrity_advance(struct bio *bio, unsigned int bytes_done);
+void bio_integrity_trim(struct bio *bio);
+int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask);
+int bioset_integrity_create(struct bio_set *bs, int pool_size);
+void bioset_integrity_free(struct bio_set *bs);
+void bio_integrity_init(void);
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+
+static inline void *bio_integrity(struct bio *bio)
+{
+	return NULL;
+}
+
+static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
+{
+	return 0;
+}
+
+static inline void bioset_integrity_free(struct bio_set *bs)
+{
+}
+
+static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
+					 ssize_t len, u32 seed)
+{
+	return -EINVAL;
+}
+
+static inline void bio_integrity_unmap_free_user(struct bio *bio)
+{
+}
+
+static inline bool bio_integrity_prep(struct bio *bio)
+{
+	return true;
+}
+
+static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
+		gfp_t gfp_mask)
+{
+	return 0;
+}
+
+static inline void bio_integrity_advance(struct bio *bio,
+		unsigned int bytes_done)
+{
+}
+
+static inline void bio_integrity_trim(struct bio *bio)
+{
+}
+
+static inline void bio_integrity_init(void)
+{
+}
+
+static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
+{
+	return false;
+}
+
+static inline void *bio_integrity_alloc(struct bio *bio, gfp_t gfp,
+		unsigned int nr)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
+					unsigned int len, unsigned int offset)
+{
+	return 0;
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+#endif /* _LINUX_BIO_INTEGRITY_H */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 818e93612947..a46e2047bea4 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -321,69 +321,6 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
 #define bio_for_each_folio_all(fi, bio)				\
 	for (bio_first_folio(&fi, bio, 0); fi.folio; bio_next_folio(&fi, bio))
 
-enum bip_flags {
-	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
-	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
-	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
-	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
-	BIP_IP_CHECKSUM		= 1 << 4, /* IP checksum */
-	BIP_INTEGRITY_USER	= 1 << 5, /* Integrity payload is user address */
-	BIP_COPY_USER		= 1 << 6, /* Kernel bounce buffer in use */
-};
-
-/*
- * bio integrity payload
- */
-struct bio_integrity_payload {
-	struct bio		*bip_bio;	/* parent bio */
-
-	struct bvec_iter	bip_iter;
-
-	unsigned short		bip_vcnt;	/* # of integrity bio_vecs */
-	unsigned short		bip_max_vcnt;	/* integrity bio_vec slots */
-	unsigned short		bip_flags;	/* control flags */
-
-	struct bvec_iter	bio_iter;	/* for rewinding parent bio */
-
-	struct work_struct	bip_work;	/* I/O completion */
-
-	struct bio_vec		*bip_vec;
-	struct bio_vec		bip_inline_vecs[];/* embedded bvec array */
-};
-
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-
-static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
-{
-	if (bio->bi_opf & REQ_INTEGRITY)
-		return bio->bi_integrity;
-
-	return NULL;
-}
-
-static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
-{
-	struct bio_integrity_payload *bip = bio_integrity(bio);
-
-	if (bip)
-		return bip->bip_flags & flag;
-
-	return false;
-}
-
-static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
-{
-	return bip->bip_iter.bi_sector;
-}
-
-static inline void bip_set_seed(struct bio_integrity_payload *bip,
-				sector_t seed)
-{
-	bip->bip_iter.bi_sector = seed;
-}
-
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
 void bio_trim(struct bio *bio, sector_t offset, sector_t size);
 extern struct bio *bio_split(struct bio *bio, int sectors,
 			     gfp_t gfp, struct bio_set *bs);
@@ -721,99 +658,6 @@ static inline bool bioset_initialized(struct bio_set *bs)
 	return bs->bio_slab != NULL;
 }
 
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-
-#define bip_for_each_vec(bvl, bip, iter)				\
-	for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter)
-
-#define bio_for_each_integrity_vec(_bvl, _bio, _iter)			\
-	for_each_bio(_bio)						\
-		bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
-
-int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
-void bio_integrity_unmap_free_user(struct bio *bio);
-extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
-extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
-extern bool bio_integrity_prep(struct bio *);
-extern void bio_integrity_advance(struct bio *, unsigned int);
-extern void bio_integrity_trim(struct bio *);
-extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
-extern int bioset_integrity_create(struct bio_set *, int);
-extern void bioset_integrity_free(struct bio_set *);
-extern void bio_integrity_init(void);
-
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-
-static inline void *bio_integrity(struct bio *bio)
-{
-	return NULL;
-}
-
-static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
-{
-	return 0;
-}
-
-static inline void bioset_integrity_free (struct bio_set *bs)
-{
-	return;
-}
-
-static inline bool bio_integrity_prep(struct bio *bio)
-{
-	return true;
-}
-
-static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
-				      gfp_t gfp_mask)
-{
-	return 0;
-}
-
-static inline void bio_integrity_advance(struct bio *bio,
-					 unsigned int bytes_done)
-{
-	return;
-}
-
-static inline void bio_integrity_trim(struct bio *bio)
-{
-	return;
-}
-
-static inline void bio_integrity_init(void)
-{
-	return;
-}
-
-static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
-{
-	return false;
-}
-
-static inline void *bio_integrity_alloc(struct bio * bio, gfp_t gfp,
-								unsigned int nr)
-{
-	return ERR_PTR(-EINVAL);
-}
-
-static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
-					unsigned int len, unsigned int offset)
-{
-	return 0;
-}
-
-static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
-					 ssize_t len, u32 seed)
-{
-	return -EINVAL;
-}
-static inline void bio_integrity_unmap_free_user(struct bio *bio)
-{
-}
-
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
 /*
  * Mark a bio as polled. Note that for async polled IO, the caller must
  * expect -EWOULDBLOCK if we cannot allocate a request (or other resources).
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 804f856ed3e5..de98049b7ded 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -3,6 +3,7 @@
 #define _LINUX_BLK_INTEGRITY_H
 
 #include <linux/blk-mq.h>
+#include <linux/bio-integrity.h>
 
 struct request;
 
-- 
cgit v1.2.3


From 21671a1ed1ff22e158ebe9d619943f926f03f5cd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 2 Jul 2024 17:10:20 +0200
Subject: block: also return bio_integrity_payload * from stubs

struct bio_integrity_payload is defined unconditionally. No need to
return void * from bio_integrity() and bio_integrity_alloc().

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240702151047.1746127-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio-integrity.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index 70ef19a0dc7e..cac24dac06ff 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -85,7 +85,7 @@ void bio_integrity_init(void);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
-static inline void *bio_integrity(struct bio *bio)
+static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
 {
 	return NULL;
 }
@@ -138,8 +138,8 @@ static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
 	return false;
 }
 
-static inline void *bio_integrity_alloc(struct bio *bio, gfp_t gfp,
-		unsigned int nr)
+static inline struct bio_integrity_payload *
+bio_integrity_alloc(struct bio *bio, gfp_t gfp, unsigned int nr)
 {
 	return ERR_PTR(-EINVAL);
 }
-- 
cgit v1.2.3


From 85253bac4d02b1f95d6109c221aeccd7a262ec4d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 2 Jul 2024 17:10:23 +0200
Subject: block: don't free submitter owned integrity payload on I/O completion

Currently __bio_integrity_endio frees the integrity payload unless it is
explicitly marked as user-mapped.  This means in-kernel callers that
allocate their own integrity payload never get to see it on I/O
completion.  The current two users don't need it as they just pre-mapped
PI tuples received over the network, but this limits uses of integrity
data lot.

Change bio_integrity_endio to call __bio_integrity_endio for block layer
generated integrity data only, and leave freeing of submitter
allocated integrity data to bio_uninit which also gets called from
the final bio_put.  This requires that unmapping user mapped or copied
integrity data is now always done by the caller, and the special
BIP_INTEGRITY_USER flag can go away.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240702151047.1746127-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio-integrity.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index cac24dac06ff..3823d9be0d07 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -10,8 +10,7 @@ enum bip_flags {
 	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
 	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
 	BIP_IP_CHECKSUM		= 1 << 4, /* IP checksum */
-	BIP_INTEGRITY_USER	= 1 << 5, /* Integrity payload is user address */
-	BIP_COPY_USER		= 1 << 6, /* Kernel bounce buffer in use */
+	BIP_COPY_USER		= 1 << 5, /* Kernel bounce buffer in use */
 };
 
 struct bio_integrity_payload {
-- 
cgit v1.2.3


From 74cc150282e41c6c0704cd305c9a4392dc64ef4d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 2 Jul 2024 17:10:24 +0200
Subject: block: don't free the integrity payload in
 bio_integrity_unmap_free_user

Now that the integrity payload is always freed in bio_uninit, don't
bother freeing it a little earlier in bio_integrity_unmap_free_user.
With that the separate bio_integrity_unmap_free_user can go away by
just passing the bio to bio_integrity_unmap_user.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240702151047.1746127-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio-integrity.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index 3823d9be0d07..dd831c269e99 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -73,7 +73,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp,
 int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len,
 		unsigned int offset);
 int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
-void bio_integrity_unmap_free_user(struct bio *bio);
+void bio_integrity_unmap_user(struct bio *bio);
 bool bio_integrity_prep(struct bio *bio);
 void bio_integrity_advance(struct bio *bio, unsigned int bytes_done);
 void bio_integrity_trim(struct bio *bio);
@@ -104,7 +104,7 @@ static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
 	return -EINVAL;
 }
 
-static inline void bio_integrity_unmap_free_user(struct bio *bio)
+static inline void bio_integrity_unmap_user(struct bio *bio)
 {
 }
 
-- 
cgit v1.2.3


From 1028f391d5f9d4248e2f49193e6de2516ad630f8 Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Wed, 3 Jul 2024 00:36:46 +0000
Subject: cgroup/misc: Introduce misc.peak

Introduce misc.peak to record the historical maximum usage of the
resource, as in some scenarios the value of misc.max could be
adjusted based on the peak usage of the resource.

Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/misc_cgroup.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
index d70eab2501ee..618392d41975 100644
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -31,11 +31,13 @@ struct misc_cg;
 /**
  * struct misc_res: Per cgroup per misc type resource
  * @max: Maximum limit on the resource.
+ * @watermark: Historical maximum usage of the resource.
  * @usage: Current usage of the resource.
  * @events: Number of times, the resource limit exceeded.
  */
 struct misc_res {
 	u64 max;
+	atomic64_t watermark;
 	atomic64_t usage;
 	atomic64_t events;
 };
-- 
cgit v1.2.3


From d00106bbdfa82732c23cba44491c38f8c410d865 Mon Sep 17 00:00:00 2001
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
Date: Mon, 1 Jul 2024 16:47:55 +0200
Subject: vdso: Add comment about reason for vdso struct ordering

struct vdso_data is optimized for fast access to the often required struct
members. The optimization is not documented in the struct description but
it should be kept in mind, when working with the vdso_data struct.

Add a comment to the struct description.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Link: https://lore.kernel.org/r/20240701-vdso-cleanup-v1-2-36eb64e7ece2@linutronix.de
---
 include/vdso/datapage.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
index d04d394db064..7647e0946f50 100644
--- a/include/vdso/datapage.h
+++ b/include/vdso/datapage.h
@@ -77,6 +77,10 @@ struct vdso_timestamp {
  * vdso_data will be accessed by 64 bit and compat code at the same time
  * so we should be careful before modifying this structure.
  *
+ * The ordering of the struct members is optimized to have fast access to the
+ * often required struct members which are related to CLOCK_REALTIME and
+ * CLOCK_MONOTONIC. This information is stored in the first cache lines.
+ *
  * @basetime is used to store the base time for the system wide time getter
  * VVAR page.
  *
-- 
cgit v1.2.3


From 4ecaf7e98a3ae0c843d67c76649ecc694232834b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 2 Jul 2024 15:33:54 -0400
Subject: tracing: Have memmapped ring buffer use ioctl of "R" range 0x20-2F

To prevent conflicts with other ioctl numbers to allow strace to have an
idea of what is happening, add the range of ioctls for the trace buffer
mapping from _IO("T", 0x1) to the range of "R" 0x20 - 0x2F.

Link: https://lore.kernel.org/linux-trace-kernel/20240630105322.GA17573@altlinux.org/
Link: https://lore.kernel.org/linux-trace-kernel/20240630213626.GA23566@altlinux.org/

Cc: Jonathan Corbet <corbet@lwn.net>
Fixes: cf9f0f7c4c5bb ("tracing: Allow user-space mapping of the ring-buffer")
Link: https://lore.kernel.org/20240702153354.367861db@rorschach.local.home
Reported-by: "Dmitry V. Levin" <ldv@strace.io>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/uapi/linux/trace_mmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h
index bd1066754220..c102ef35d11e 100644
--- a/include/uapi/linux/trace_mmap.h
+++ b/include/uapi/linux/trace_mmap.h
@@ -43,6 +43,6 @@ struct trace_buffer_meta {
 	__u64	Reserved2;
 };
 
-#define TRACE_MMAP_IOCTL_GET_READER		_IO('T', 0x1)
+#define TRACE_MMAP_IOCTL_GET_READER		_IO('R', 0x20)
 
 #endif /* _TRACE_MMAP_H_ */
-- 
cgit v1.2.3


From bf95919fe1917efa8f5da83057ff9fc11130aa55 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Mon, 1 Jul 2024 09:39:36 +0200
Subject: ASoC: dapm: Use unsigned for number of widgets in
 snd_soc_dapm_new_controls()

Number of widgets in array passed to snd_soc_dapm_new_controls() cannot
be negative, so make it explicit by using 'unsigned int', just like
snd_soc_add_component_controls() is doing.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://patch.msgid.link/20240701-b4-qcom-audio-lpass-codec-cleanups-v3-4-6d98d4dd1ef5@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dapm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 667ecd4daa68..12cd7b5a2202 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -457,7 +457,7 @@ int snd_soc_dapm_get_pin_switch(struct snd_kcontrol *kcontrol,
 int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *uncontrol);
 int snd_soc_dapm_new_controls(struct snd_soc_dapm_context *dapm,
-	const struct snd_soc_dapm_widget *widget, int num);
+	const struct snd_soc_dapm_widget *widget, unsigned int num);
 struct snd_soc_dapm_widget *snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm,
 		const struct snd_soc_dapm_widget *widget);
 struct snd_soc_dapm_widget *snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm,
-- 
cgit v1.2.3


From 526e21a2aa6fa7ac3309a8c87f4d6e5f7e407cb6 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Wed, 3 Jul 2024 07:20:34 +0900
Subject: firewire: ohci: add tracepoints event for data of Self-ID DMA

In 1394 OHCI, the SelfIDComplete event occurs when the hardware has
finished transmitting all of the self ID packets received during the bus
initialization process to the host memory by DMA.

This commit adds a tracepoints event for this event to trace the timing
and packet data of Self-ID DMA. It is the part of following tracepoints
events helpful to debug some events at bus reset; e.g. the issue addressed
at a commit d0b06dc48fb1 ("firewire: core: use long bus reset on gap count
error")[1]:

* firewire_ohci:irqs
* firewire_ohci:self_id_complete
* firewire:bus_reset_handle
* firewire:self_id_sequence

They would be also helpful in the problem about invocation timing of
hardIRQ and process (workqueue) contexts. We can often see this kind of
problem with -rt kernel[2].

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d0b06dc48fb1
[2] https://lore.kernel.org/linux-rt-users/YAwPoaUZ1gTD5y+k@hmbx/

Link: https://lore.kernel.org/r/20240702222034.1378764-6-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire_ohci.h | 54 ++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/firewire_ohci.h b/include/trace/events/firewire_ohci.h
index 483aeeb033af..4f9a7f2577f3 100644
--- a/include/trace/events/firewire_ohci.h
+++ b/include/trace/events/firewire_ohci.h
@@ -9,6 +9,8 @@
 
 #include <linux/tracepoint.h>
 
+// Some macros and helper functions are defined in 'drivers/firewire/ohci.c'.
+
 TRACE_EVENT(irqs,
 	TP_PROTO(unsigned int card_index, u32 events),
 	TP_ARGS(card_index, events),
@@ -42,6 +44,58 @@ TRACE_EVENT(irqs,
 	)
 );
 
+#define QUADLET_SIZE	4
+
+#define SELF_ID_COUNT_IS_ERROR(reg)	\
+	(!!(((reg) & OHCI1394_SelfIDCount_selfIDError_MASK) >> OHCI1394_SelfIDCount_selfIDError_SHIFT))
+
+#define SELF_ID_COUNT_GET_GENERATION(reg)	\
+	(((reg) & OHCI1394_SelfIDCount_selfIDGeneration_MASK) >> OHCI1394_SelfIDCount_selfIDGeneration_SHIFT)
+
+#define SELF_ID_RECEIVE_Q0_GET_GENERATION(quadlet)	\
+	(((quadlet) & OHCI1394_SELF_ID_RECEIVE_Q0_GENERATION_MASK) >> OHCI1394_SELF_ID_RECEIVE_Q0_GENERATION_SHIFT)
+
+#define SELF_ID_RECEIVE_Q0_GET_TIMESTAMP(quadlet)	\
+	(((quadlet) & OHCI1394_SELF_ID_RECEIVE_Q0_TIMESTAMP_MASK) >> OHCI1394_SELF_ID_RECEIVE_Q0_TIMESTAMP_SHIFT)
+
+TRACE_EVENT(self_id_complete,
+	TP_PROTO(unsigned int card_index, u32 reg, const __le32 *self_id_receive, bool has_be_header_quirk),
+	TP_ARGS(card_index, reg, self_id_receive, has_be_header_quirk),
+	TP_STRUCT__entry(
+		__field(u8, card_index)
+		__field(u32, reg)
+		__dynamic_array(u32, self_id_receive, ohci1394_self_id_count_get_size(reg))
+	),
+	TP_fast_assign(
+		__entry->card_index = card_index;
+		__entry->reg = reg;
+		{
+			u32 *ptr = __get_dynamic_array(self_id_receive);
+			int i;
+
+			for (i = 0; i < __get_dynamic_array_len(self_id_receive) / QUADLET_SIZE; ++i)
+				ptr[i] = cond_le32_to_cpu(self_id_receive[i], has_be_header_quirk);
+		}
+	),
+	TP_printk(
+		"card_index=%u is_error=%s generation_at_bus_reset=%u generation_at_completion=%u timestamp=0x%04x packet_data=%s",
+		__entry->card_index,
+		SELF_ID_COUNT_IS_ERROR(__entry->reg) ? "true" : "false",
+		SELF_ID_COUNT_GET_GENERATION(__entry->reg),
+		SELF_ID_RECEIVE_Q0_GET_GENERATION(((const u32 *)__get_dynamic_array(self_id_receive))[0]),
+		SELF_ID_RECEIVE_Q0_GET_TIMESTAMP(((const u32 *)__get_dynamic_array(self_id_receive))[0]),
+		__print_array(((const u32 *)__get_dynamic_array(self_id_receive)) + 1,
+			      (__get_dynamic_array_len(self_id_receive) / QUADLET_SIZE) - 1, QUADLET_SIZE)
+	)
+);
+
+#undef SELF_ID_COUNT_IS_ERROR
+#undef SELF_ID_COUNT_GET_GENERATION
+#undef SELF_ID_RECEIVE_Q0_GET_GENERATION
+#undef SELF_ID_RECEIVE_Q0_GET_TIMESTAMP
+
+#undef QUADLET_SIZE
+
 #endif // _FIREWIRE_OHCI_TRACE_EVENT_H
 
 #include <trace/define_trace.h>
-- 
cgit v1.2.3


From a19621ed4e0ac9e1d296d07dc8ff8c27d5c03b43 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 15 May 2024 15:07:06 +0800
Subject: mm: add folio_alloc_mpol()

Patch series "mm: convert to folio_alloc_mpol()".


This patch (of 4):

This adds a new folio_alloc_mpol() like folio_alloc() but allocate folio
according to NUMA mempolicy.

Link: https://lkml.kernel.org/r/20240515070709.78529-1-wangkefeng.wang@huawei.com
Link: https://lkml.kernel.org/r/20240515070709.78529-2-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/gfp.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 7f9691d375f0..f53f76e0b17e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -303,6 +303,8 @@ struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order);
 struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order,
 		struct mempolicy *mpol, pgoff_t ilx, int nid);
 struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order);
+struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order,
+		struct mempolicy *mpol, pgoff_t ilx, int nid);
 struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma,
 		unsigned long addr, bool hugepage);
 #else
@@ -319,6 +321,11 @@ static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order)
 {
 	return __folio_alloc_node(gfp, order, numa_node_id());
 }
+static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order,
+		struct mempolicy *mpol, pgoff_t ilx, int nid)
+{
+	return folio_alloc_noprof(gfp, order);
+}
 #define vma_alloc_folio_noprof(gfp, order, vma, addr, hugepage)		\
 	folio_alloc_noprof(gfp, order)
 #endif
@@ -326,6 +333,7 @@ static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order)
 #define alloc_pages(...)			alloc_hooks(alloc_pages_noprof(__VA_ARGS__))
 #define alloc_pages_mpol(...)			alloc_hooks(alloc_pages_mpol_noprof(__VA_ARGS__))
 #define folio_alloc(...)			alloc_hooks(folio_alloc_noprof(__VA_ARGS__))
+#define folio_alloc_mpol(...)			alloc_hooks(folio_alloc_mpol_noprof(__VA_ARGS__))
 #define vma_alloc_folio(...)			alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__))
 
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
-- 
cgit v1.2.3


From 7f83bf14603ef41a44dc907594d749a283e22c37 Mon Sep 17 00:00:00 2001
From: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Date: Wed, 15 May 2024 10:47:54 +0800
Subject: mm/huge_memory: mark racy access onhuge_anon_orders_always

huge_anon_orders_always is accessed lockless, it is better to use the
READ_ONCE() wrapper.  This is not fixing any visible bug, hopefully this
can cease some KCSAN complains in the future.  Also do that for
huge_anon_orders_madvise.

Link: https://lkml.kernel.org/r/20240515104754889HqrahFPePOIE1UlANHVAh@zte.com.cn
Signed-off-by: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lu Zhongjun <lu.zhongjun@zte.com.cn>
Reviewed-by: xu xin <xu.xin16@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 2aa986a5cd1b..088d66a54643 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -134,8 +134,8 @@ static inline bool hugepage_flags_enabled(void)
 	 * So we don't need to look at huge_anon_orders_inherit.
 	 */
 	return hugepage_global_enabled() ||
-	       huge_anon_orders_always ||
-	       huge_anon_orders_madvise;
+	       READ_ONCE(huge_anon_orders_always) ||
+	       READ_ONCE(huge_anon_orders_madvise);
 }
 
 static inline int highest_order(unsigned long orders)
-- 
cgit v1.2.3


From 564a2ee9f9f66ceef135b7208cff705d48ad76c4 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Wed, 22 May 2024 01:58:51 +0800
Subject: mm: remove page_file_offset and folio_file_pos

These two helpers were useful for mixed usage of swap cache and page
cache, which help retrieve the corresponding file or swap device offset of
a page or folio.

They were introduced in commit f981c5950fa8 ("mm: methods for teaching
filesystems about PG_swapcache pages") and used in commit d56b4ddf7781
("nfs: teach the NFS client how to treat PG_swapcache pages"), suppose to
be used with direct_IO for swap over fs.

But after commit e1209d3a7a67 ("mm: introduce ->swap_rw and use it for
reads from SWP_FS_OPS swap-space"), swap with direct_IO is no more, and
swap cache mapping is never exposed to fs.

Now we have dropped all users of page_file_offset and folio_file_pos, so
they can be deleted.

Link: https://lkml.kernel.org/r/20240521175854.96038-10-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Anna Schumaker <anna@kernel.org>
Cc: Barry Song <v-songbaohua@oppo.com>
Cc: Chao Yu <chao@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ilya Dryomov <idryomov@gmail.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Marc Dionne <marc.dionne@auristor.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: NeilBrown <neilb@suse.de>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Xiubo Li <xiubli@redhat.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 59f1df0cde5a..78f2cd8c73ff 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -932,11 +932,6 @@ static inline loff_t page_offset(struct page *page)
 	return ((loff_t)page->index) << PAGE_SHIFT;
 }
 
-static inline loff_t page_file_offset(struct page *page)
-{
-	return ((loff_t)page_index(page)) << PAGE_SHIFT;
-}
-
 /**
  * folio_pos - Returns the byte position of this folio in its file.
  * @folio: The folio.
@@ -946,18 +941,6 @@ static inline loff_t folio_pos(struct folio *folio)
 	return page_offset(&folio->page);
 }
 
-/**
- * folio_file_pos - Returns the byte position of this folio in its file.
- * @folio: The folio.
- *
- * This differs from folio_pos() for folios which belong to a swap file.
- * NFS is the only filesystem today which needs to use folio_file_pos().
- */
-static inline loff_t folio_file_pos(struct folio *folio)
-{
-	return page_file_offset(&folio->page);
-}
-
 /*
  * Get the offset in PAGE_SIZE (even for hugetlb folios).
  */
-- 
cgit v1.2.3


From 05b0c7edad9b8a5ccf1b46b01e1b96fcd10b50d8 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Wed, 22 May 2024 01:58:52 +0800
Subject: mm: drop page_index and simplify folio_index

There are two helpers for retrieving the index within address space for
mixed usage of swap cache and page cache:

- page_index
- folio_index

This commit drops page_index, as we have eliminated all users, and
converts folio_index's helper __page_file_index to use folio to avoid the
page conversion.

Link: https://lkml.kernel.org/r/20240521175854.96038-11-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Anna Schumaker <anna@kernel.org>
Cc: Barry Song <v-songbaohua@oppo.com>
Cc: Chao Yu <chao@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ilya Dryomov <idryomov@gmail.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Marc Dionne <marc.dionne@auristor.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: NeilBrown <neilb@suse.de>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Xiubo Li <xiubli@redhat.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h      | 13 -------------
 include/linux/pagemap.h |  8 ++++----
 2 files changed, 4 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index eb7c96d24ac0..99174fac3d47 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2295,19 +2295,6 @@ static inline void *folio_address(const struct folio *folio)
 	return page_address(&folio->page);
 }
 
-extern pgoff_t __page_file_index(struct page *page);
-
-/*
- * Return the pagecache index of the passed page.  Regular pagecache pages
- * use ->index whereas swapcache pages use swp_offset(->private)
- */
-static inline pgoff_t page_index(struct page *page)
-{
-	if (unlikely(PageSwapCache(page)))
-		return __page_file_index(page);
-	return page->index;
-}
-
 /*
  * Return true only if the page has been allocated with
  * ALLOC_NO_WATERMARKS and the low watermark was not
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 78f2cd8c73ff..1586b5c21e81 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -792,7 +792,7 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 			mapping_gfp_mask(mapping));
 }
 
-#define swapcache_index(folio)	__page_file_index(&(folio)->page)
+extern pgoff_t __folio_swap_cache_index(struct folio *folio);
 
 /**
  * folio_index - File index of a folio.
@@ -807,9 +807,9 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
  */
 static inline pgoff_t folio_index(struct folio *folio)
 {
-        if (unlikely(folio_test_swapcache(folio)))
-                return swapcache_index(folio);
-        return folio->index;
+	if (unlikely(folio_test_swapcache(folio)))
+		return __folio_swap_cache_index(folio);
+	return folio->index;
 }
 
 /**
-- 
cgit v1.2.3


From 63818aaf0da8c036d600424ac961620dcdc13ce2 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Mon, 20 May 2024 15:44:07 -0700
Subject: mm/hugetlb: remove {Set,Clear}Hpage macros

All users have been converted to use the folio version of these macros, we
can safely remove the page based interface.

Link: https://lkml.kernel.org/r/20240520224407.110062-1-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 2b3c3a404769..15a58f69782c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -625,18 +625,14 @@ static __always_inline						\
 void folio_set_hugetlb_##flname(struct folio *folio)		\
 	{	void *private = &folio->private;		\
 		set_bit(HPG_##flname, private);			\
-	}							\
-static inline void SetHPage##uname(struct page *page)		\
-	{ set_bit(HPG_##flname, &(page->private)); }
+	}
 
 #define CLEARHPAGEFLAG(uname, flname)				\
 static __always_inline						\
 void folio_clear_hugetlb_##flname(struct folio *folio)		\
 	{	void *private = &folio->private;		\
 		clear_bit(HPG_##flname, private);		\
-	}							\
-static inline void ClearHPage##uname(struct page *page)		\
-	{ clear_bit(HPG_##flname, &(page->private)); }
+	}
 #else
 #define TESTHPAGEFLAG(uname, flname)				\
 static inline bool						\
@@ -648,15 +644,11 @@ static inline int HPage##uname(struct page *page)		\
 #define SETHPAGEFLAG(uname, flname)				\
 static inline void						\
 folio_set_hugetlb_##flname(struct folio *folio) 		\
-	{ }							\
-static inline void SetHPage##uname(struct page *page)		\
 	{ }
 
 #define CLEARHPAGEFLAG(uname, flname)				\
 static inline void						\
 folio_clear_hugetlb_##flname(struct folio *folio)		\
-	{ }							\
-static inline void ClearHPage##uname(struct page *page)		\
 	{ }
 #endif
 
-- 
cgit v1.2.3


From 6ad28e7e52e2bae76b1d3eb4466999d04d50db92 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 22 May 2024 14:57:13 +0200
Subject: mm/rmap: sanity check that zeropages are not passed to RMAP

Using insert_page() we might have previously ended up passing the zeropage
into rmap code.  Make sure that won't happen again.

Note that we won't check the huge zeropage for now, which might still end
up in RMAP code.

Link: https://lkml.kernel.org/r/20240522125713.775114-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/rmap.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 7229b9baf20d..5cb0d419a1d7 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -200,6 +200,9 @@ static inline void __folio_rmap_sanity_checks(struct folio *folio,
 	/* hugetlb folios are handled separately. */
 	VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
 
+	/* When (un)mapping zeropages, we should never touch ref+mapcount. */
+	VM_WARN_ON_FOLIO(is_zero_folio(folio), folio);
+
 	/*
 	 * TODO: we get driver-allocated folios that have nothing to do with
 	 * the rmap using vm_insert_page(); therefore, we cannot assume that
-- 
cgit v1.2.3


From 23b1b44e6c61295084284aa7d87db863a7802b92 Mon Sep 17 00:00:00 2001
From: Bang Li <libang.li@antgroup.com>
Date: Wed, 22 May 2024 14:12:02 +0800
Subject: mm: add update_mmu_tlb_range()

Patch series "Add update_mmu_tlb_range() to simplify code", v4.

This series of commits mainly adds the update_mmu_tlb_range() to batch
update tlb in an address range and implement update_mmu_tlb() using
update_mmu_tlb_range().

After commit 19eaf44954df ("mm: thp: support allocation of anonymous
multi-size THP"), We may need to batch update tlb of a certain address
range by calling update_mmu_tlb() in a loop.  Using the
update_mmu_tlb_range(), we can simplify the code and possibly reduce the
execution of some unnecessary code in some architectures.


This patch (of 3):

Add update_mmu_tlb_range(), we can batch update tlb of an address range.

Link: https://lkml.kernel.org/r/20240522061204.117421-1-libang.li@antgroup.com
Link: https://lkml.kernel.org/r/20240522061204.117421-2-libang.li@antgroup.com
Signed-off-by: Bang Li <libang.li@antgroup.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 18019f037bae..17d1caee39ab 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -729,6 +729,13 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
  * fault. This function updates TLB only, do nothing with cache or others.
  * It is the difference with function update_mmu_cache.
  */
+#ifndef update_mmu_tlb_range
+static inline void update_mmu_tlb_range(struct vm_area_struct *vma,
+				unsigned long address, pte_t *ptep, unsigned int nr)
+{
+}
+#endif
+
 #ifndef __HAVE_ARCH_UPDATE_MMU_TLB
 static inline void update_mmu_tlb(struct vm_area_struct *vma,
 				unsigned long address, pte_t *ptep)
-- 
cgit v1.2.3


From 8f65aa32239f1c3f11b7a25bd5921223bafc5fed Mon Sep 17 00:00:00 2001
From: Bang Li <libang.li@antgroup.com>
Date: Wed, 22 May 2024 14:12:03 +0800
Subject: mm: implement update_mmu_tlb() using update_mmu_tlb_range()

Let's make update_mmu_tlb() simply a generic wrapper around
update_mmu_tlb_range().  Only the latter can now be overridden by the
architecture.  We can now remove __HAVE_ARCH_UPDATE_MMU_TLB as well.

Link: https://lkml.kernel.org/r/20240522061204.117421-3-libang.li@antgroup.com
Signed-off-by: Bang Li <libang.li@antgroup.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 17d1caee39ab..117b807e3f89 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -736,13 +736,11 @@ static inline void update_mmu_tlb_range(struct vm_area_struct *vma,
 }
 #endif
 
-#ifndef __HAVE_ARCH_UPDATE_MMU_TLB
 static inline void update_mmu_tlb(struct vm_area_struct *vma,
 				unsigned long address, pte_t *ptep)
 {
+	update_mmu_tlb_range(vma, address, ptep, 1);
 }
-#define __HAVE_ARCH_UPDATE_MMU_TLB
-#endif
 
 /*
  * Some architectures may be able to avoid expensive synchronization
-- 
cgit v1.2.3


From b8b9488d50b7150bd4830dfff487e8d4ef6589ba Mon Sep 17 00:00:00 2001
From: Jane Chu <jane.chu@oracle.com>
Date: Fri, 24 May 2024 15:53:04 -0600
Subject: mm/memory-failure: improve memory failure action_result messages

Added two explicit MF_MSG messages describing failure in
get_hwpoison_page.  Attemped to document the definition of various action
names, and made a few adjustment to the action_result() calls.

Link: https://lkml.kernel.org/r/20240524215306.2705454-4-jane.chu@oracle.com
Signed-off-by: Jane Chu <jane.chu@oracle.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Oscar Salvador <oalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h      | 2 ++
 include/ras/ras_event.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 99174fac3d47..f4bf94ca99e3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4096,6 +4096,7 @@ enum mf_action_page_type {
 	MF_MSG_DIFFERENT_COMPOUND,
 	MF_MSG_HUGE,
 	MF_MSG_FREE_HUGE,
+	MF_MSG_GET_HWPOISON,
 	MF_MSG_UNMAP_FAILED,
 	MF_MSG_DIRTY_SWAPCACHE,
 	MF_MSG_CLEAN_SWAPCACHE,
@@ -4109,6 +4110,7 @@ enum mf_action_page_type {
 	MF_MSG_BUDDY,
 	MF_MSG_DAX,
 	MF_MSG_UNSPLIT_THP,
+	MF_MSG_ALREADY_POISONED,
 	MF_MSG_UNKNOWN,
 };
 
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 7c47151d5c72..cf7f19b7ce64 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -360,6 +360,7 @@ TRACE_EVENT(aer_event,
 	EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \
 	EM ( MF_MSG_HUGE, "huge page" )					\
 	EM ( MF_MSG_FREE_HUGE, "free huge page" )			\
+	EM ( MF_MSG_GET_HWPOISON, "get hwpoison page" )			\
 	EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" )		\
 	EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" )		\
 	EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" )		\
@@ -373,6 +374,7 @@ TRACE_EVENT(aer_event,
 	EM ( MF_MSG_BUDDY, "free buddy page" )				\
 	EM ( MF_MSG_DAX, "dax page" )					\
 	EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" )			\
+	EM ( MF_MSG_ALREADY_POISONED, "already poisoned" )		\
 	EMe ( MF_MSG_UNKNOWN, "unknown page" )
 
 /*
-- 
cgit v1.2.3


From 188f87f2648b13f5de17d5e068f18d317e0c1f98 Mon Sep 17 00:00:00 2001
From: Eric Chanudet <echanude@redhat.com>
Date: Wed, 22 May 2024 16:38:01 -0400
Subject: mm/mm_init: use node's number of cpus in
 deferred_page_init_max_threads

x86_64 is already using the node's cpu as maximum threads.  Make that the
default for all archs setting DEFERRED_STRUCT_PAGE_INIT.

This returns to the behavior prior making the function arch-specific with
commit ecd096506922 ("mm: make deferred init's max threads
arch-specific").

Setting DEFERRED_STRUCT_PAGE_INIT and testing on a few arm64 platforms
shows faster deferred_init_memmap completions:

|         | x13s        | SA8775p-ride | Ampere R137-P31 | Ampere HR330 |
|         | Metal, 32GB | VM, 36GB     | VM, 58GB        | Metal, 128GB |
|         | 8cpus       | 8cpus        | 8cpus           | 32cpus       |
|---------|-------------|--------------|-----------------|--------------|
| threads |  ms     (%) | ms       (%) |  ms         (%) |  ms      (%) |
|---------|-------------|--------------|-----------------|--------------|
| 1       | 108    (0%) | 72      (0%) | 224        (0%) | 324     (0%) |
| cpus    |  24  (-77%) | 36    (-50%) |  40      (-82%) |  56   (-82%) |

Michael Ellerman reported:

: On a machine here (1TB, 40 cores, 4KB pages) the existing code gives:
:
:   [    0.500124] node 2 deferred pages initialised in 210ms
:   [    0.515790] node 3 deferred pages initialised in 230ms
:   [    0.516061] node 0 deferred pages initialised in 230ms
:   [    0.516522] node 7 deferred pages initialised in 230ms
:   [    0.516672] node 4 deferred pages initialised in 230ms
:   [    0.516798] node 6 deferred pages initialised in 230ms
:   [    0.517051] node 5 deferred pages initialised in 230ms
:   [    0.523887] node 1 deferred pages initialised in 240ms
:
: vs with the patch:
:
:   [    0.379613] node 0 deferred pages initialised in 90ms
:   [    0.380388] node 1 deferred pages initialised in 90ms
:   [    0.380540] node 4 deferred pages initialised in 100ms
:   [    0.390239] node 6 deferred pages initialised in 100ms
:   [    0.390249] node 2 deferred pages initialised in 100ms
:   [    0.390786] node 3 deferred pages initialised in 110ms
:   [    0.396721] node 5 deferred pages initialised in 110ms
:   [    0.397095] node 7 deferred pages initialised in 110ms
:
: Which is a nice speedup.

[echanude@redhat.com: v3]
  Link: https://lkml.kernel.org/r/20240528185455.643227-4-echanude@redhat.com
Link: https://lkml.kernel.org/r/20240522203758.626932-4-echanude@redhat.com
Signed-off-by: Eric Chanudet <echanude@redhat.com>
Tested-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Reviewed-by: Baoquan He <bhe@redhat.com>
Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Acked-by: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memblock.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index e2082240586d..40c62aca36ec 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -335,8 +335,6 @@ void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
 	for (; i != U64_MAX;					  \
 	     __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
 
-int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask);
-
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
 /**
-- 
cgit v1.2.3


From ffc3c8a631eeadd0de63605cecfb6ba544245352 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 24 May 2024 09:49:50 +0800
Subject: mm: memcontrol: remove page_memcg()

The page_memcg() only called by mod_memcg_page_state(), so squash it to
cleanup page_memcg().

Link: https://lkml.kernel.org/r/20240524014950.187805-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 030d34e9d117..3d1599146afe 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -443,11 +443,6 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio)
 	return __folio_memcg(folio);
 }
 
-static inline struct mem_cgroup *page_memcg(struct page *page)
-{
-	return folio_memcg(page_folio(page));
-}
-
 /**
  * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio.
  * @folio: Pointer to the folio.
@@ -1014,7 +1009,7 @@ static inline void mod_memcg_page_state(struct page *page,
 		return;
 
 	rcu_read_lock();
-	memcg = page_memcg(page);
+	memcg = folio_memcg(page_folio(page));
 	if (memcg)
 		mod_memcg_state(memcg, idx, val);
 	rcu_read_unlock();
@@ -1133,11 +1128,6 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio)
 	return NULL;
 }
 
-static inline struct mem_cgroup *page_memcg(struct page *page)
-{
-	return NULL;
-}
-
 static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
@@ -1636,7 +1626,7 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
 	spin_unlock_irqrestore(&lruvec->lru_lock, flags);
 }
 
-/* Test requires a stable page->memcg binding, see page_memcg() */
+/* Test requires a stable folio->memcg binding, see folio_memcg() */
 static inline bool folio_matches_lruvec(struct folio *folio,
 		struct lruvec *lruvec)
 {
-- 
cgit v1.2.3


From 06668257a355a05483c188e35a18c80471d06987 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 24 May 2024 19:18:10 +0100
Subject: mm: remove page_mapping()

All callers are now converted, delete this compatibility wrapper.  Also
fix up some comments which referred to page_mapping.

Link: https://lkml.kernel.org/r/20240423225552.4113447-7-willy@infradead.org
Link: https://lkml.kernel.org/r/20240524181813.698813-1-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/buffer_head.h |  2 +-
 include/linux/page-flags.h  | 23 ++++++++++++-----------
 include/linux/pagemap.h     |  1 -
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e022e40b099e..14acf1bbe0ce 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -53,7 +53,7 @@ typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
  * filesystem and block layers.  Nowadays the basic I/O unit
  * is the bio, and buffer_heads are used for extracting block
  * mappings (via a get_block_t call), for tracking state within
- * a page (via a page_mapping) and for wrapping bio submission
+ * a folio (via a folio_mapping) and for wrapping bio submission
  * for backward compatibility reasons (e.g. submit_bh).
  */
 struct buffer_head {
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b9e914e1face..813eea2efe26 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -655,27 +655,28 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
 #endif
 
 /*
- * On an anonymous page mapped into a user virtual memory area,
- * page->mapping points to its anon_vma, not to a struct address_space;
+ * On an anonymous folio mapped into a user virtual memory area,
+ * folio->mapping points to its anon_vma, not to a struct address_space;
  * with the PAGE_MAPPING_ANON bit set to distinguish it.  See rmap.h.
  *
  * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled,
  * the PAGE_MAPPING_MOVABLE bit may be set along with the PAGE_MAPPING_ANON
- * bit; and then page->mapping points, not to an anon_vma, but to a private
+ * bit; and then folio->mapping points, not to an anon_vma, but to a private
  * structure which KSM associates with that merged page.  See ksm.h.
  *
  * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable
- * page and then page->mapping points to a struct movable_operations.
+ * page and then folio->mapping points to a struct movable_operations.
  *
- * Please note that, confusingly, "page_mapping" refers to the inode
- * address_space which maps the page from disk; whereas "page_mapped"
- * refers to user virtual address space into which the page is mapped.
+ * Please note that, confusingly, "folio_mapping" refers to the inode
+ * address_space which maps the folio from disk; whereas "folio_mapped"
+ * refers to user virtual address space into which the folio is mapped.
  *
  * For slab pages, since slab reuses the bits in struct page to store its
- * internal states, the page->mapping does not exist as such, nor do these
- * flags below.  So in order to avoid testing non-existent bits, please
- * make sure that PageSlab(page) actually evaluates to false before calling
- * the following functions (e.g., PageAnon).  See mm/slab.h.
+ * internal states, the folio->mapping does not exist as such, nor do
+ * these flags below.  So in order to avoid testing non-existent bits,
+ * please make sure that folio_test_slab(folio) actually evaluates to
+ * false before calling the following functions (e.g., folio_test_anon).
+ * See mm/slab.h.
  */
 #define PAGE_MAPPING_ANON	0x1
 #define PAGE_MAPPING_MOVABLE	0x2
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1586b5c21e81..e37e16ebff7a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -426,7 +426,6 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
 #endif
 }
 
-struct address_space *page_mapping(struct page *);
 struct address_space *folio_mapping(struct folio *);
 struct address_space *swapcache_mapping(struct folio *);
 
-- 
cgit v1.2.3


From 1df07243483c1a32c8f2f93b06dc52a583a4dd1c Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 24 May 2024 13:36:18 +0800
Subject: rmap: remove DEFINE_PAGE_VMA_WALK()

This are no users since commit 40d707f33db5 ("mm/ksm: use folio in
write_protect_page"), so remove DEFINE_PAGE_VMA_WALK().

Link: https://lkml.kernel.org/r/20240524053618.208895-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Alex Shi <alexs@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/rmap.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 5cb0d419a1d7..bb53e5920b88 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -684,16 +684,6 @@ struct page_vma_mapped_walk {
 	unsigned int flags;
 };
 
-#define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags)	\
-	struct page_vma_mapped_walk name = {				\
-		.pfn = page_to_pfn(_page),				\
-		.nr_pages = compound_nr(_page),				\
-		.pgoff = page_to_pgoff(_page),				\
-		.vma = _vma,						\
-		.address = _address,					\
-		.flags = _flags,					\
-	}
-
 #define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags)	\
 	struct page_vma_mapped_walk name = {				\
 		.pfn = folio_pfn(_folio),				\
-- 
cgit v1.2.3


From 940d6683c79950b21b3762124eabfa9b2f6fee96 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 24 May 2024 13:28:42 +0800
Subject: mm: migrate: remove migrate_folio_extra()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

migrate_folio_extra() is only called in migrate.c now, convert it a static
function and take a new src_private argument which could be shared by
migrate_folio() and filemap_migrate_folio() to simplify code a bit.

Link: https://lkml.kernel.org/r/20240524052843.182275-5-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Jane Chu <jane.chu@oracle.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jiaqi Yan <jiaqiyan@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/migrate.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 2ce13e8a309b..517f70b70620 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -63,8 +63,6 @@ extern const char *migrate_reason_names[MR_TYPES];
 #ifdef CONFIG_MIGRATION
 
 void putback_movable_pages(struct list_head *l);
-int migrate_folio_extra(struct address_space *mapping, struct folio *dst,
-		struct folio *src, enum migrate_mode mode, int extra_count);
 int migrate_folio(struct address_space *mapping, struct folio *dst,
 		struct folio *src, enum migrate_mode mode);
 int migrate_pages(struct list_head *l, new_folio_t new, free_folio_t free,
-- 
cgit v1.2.3


From 906632843d00a4a42072b19c423b30a9e7adef3c Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 24 May 2024 13:28:43 +0800
Subject: mm: remove MIGRATE_SYNC_NO_COPY mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 2916ecc0f9d4 ("mm/migrate: new migrate mode MIGRATE_SYNC_NO_COPY")
introduce a new MIGRATE_SYNC_NO_COPY mode to allow to offload the copy to
a device DMA engine, which is only used __migrate_device_pages() to decide
whether or not copy the old page, and the MIGRATE_SYNC_NO_COPY mode only
set in hmm, as the MIGRATE_SYNC_NO_COPY set is removed by previous
cleanup, it seems that we could remove the unnecessary
MIGRATE_SYNC_NO_COPY.

Link: https://lkml.kernel.org/r/20240524052843.182275-6-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Jane Chu <jane.chu@oracle.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jiaqi Yan <jiaqiyan@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/migrate_mode.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
index f37cc03f9369..9fb482bb7323 100644
--- a/include/linux/migrate_mode.h
+++ b/include/linux/migrate_mode.h
@@ -7,16 +7,11 @@
  *	on most operations but not ->writepage as the potential stall time
  *	is too significant
  * MIGRATE_SYNC will block when migrating pages
- * MIGRATE_SYNC_NO_COPY will block when migrating pages but will not copy pages
- *	with the CPU. Instead, page copy happens outside the migratepage()
- *	callback and is likely using a DMA engine. See migrate_vma() and HMM
- *	(mm/hmm.c) for users of this mode.
  */
 enum migrate_mode {
 	MIGRATE_ASYNC,
 	MIGRATE_SYNC_LIGHT,
 	MIGRATE_SYNC,
-	MIGRATE_SYNC_NO_COPY,
 };
 
 enum migrate_reason {
-- 
cgit v1.2.3


From ebfba0045176cb013f49cb3e5bd9f0b16eba203c Mon Sep 17 00:00:00 2001
From: Chuanhua Han <hanchuanhua@oppo.com>
Date: Wed, 29 May 2024 20:28:19 +1200
Subject: mm: swap: introduce swap_free_nr() for batched swap_free()

Patch series "large folios swap-in: handle refault cases first", v5.

This patchset is extracted from the large folio swapin series[1],
primarily addressing the handling of scenarios involving large folios in
the swap cache.  Currently, it is particularly focused on addressing the
refaulting of mTHP, which is still undergoing reclamation.  This approach
aims to streamline code review and expedite the integration of this
segment into the MM tree.

It relies on Ryan's swap-out series[2], leveraging the helper function
swap_pte_batch() introduced by that series.

Presently, do_swap_page only encounters a large folio in the swap cache
before the large folio is released by vmscan.  However, the code should
remain equally useful once we support large folio swap-in via
swapin_readahead().  This approach can effectively reduce page faults and
eliminate most redundant checks and early exits for MTE restoration in
recent MTE patchset[3].

The large folio swap-in for SWP_SYNCHRONOUS_IO and swapin_readahead() will
be split into separate patch sets and sent at a later time.

[1] https://lore.kernel.org/linux-mm/20240304081348.197341-1-21cnbao@gmail.com/
[2] https://lore.kernel.org/linux-mm/20240408183946.2991168-1-ryan.roberts@arm.com/
[3] https://lore.kernel.org/linux-mm/20240322114136.61386-1-21cnbao@gmail.com/


This patch (of 6):

While swapping in a large folio, we need to free swaps related to the
whole folio.  To avoid frequently acquiring and releasing swap locks, it
is better to introduce an API for batched free.  Furthermore, this new
function, swap_free_nr(), is designed to efficiently handle various
scenarios for releasing a specified number, nr, of swap entries.

Link: https://lkml.kernel.org/r/20240529082824.150954-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20240529082824.150954-2-21cnbao@gmail.com
Signed-off-by: Chuanhua Han <hanchuanhua@oppo.com>
Co-developed-by: Barry Song <v-songbaohua@oppo.com>
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Acked-by: Chris Li <chrisl@kernel.org>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gao Xiang <xiang@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <kasong@tencent.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Khalid Aziz <khalid.aziz@oracle.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index bd450023b9a4..0f41fe49c9dc 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -478,6 +478,7 @@ extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t);
 extern void swap_free(swp_entry_t);
+extern void swap_free_nr(swp_entry_t entry, int nr_pages);
 extern void swapcache_free_entries(swp_entry_t *entries, int n);
 extern void free_swap_and_cache_nr(swp_entry_t entry, int nr);
 int swap_type_of(dev_t device, sector_t offset);
@@ -559,6 +560,10 @@ static inline void swap_free(swp_entry_t swp)
 {
 }
 
+static inline void swap_free_nr(swp_entry_t entry, int nr_pages)
+{
+}
+
 static inline void put_swap_folio(struct folio *folio, swp_entry_t swp)
 {
 }
-- 
cgit v1.2.3


From 54f7a49c20ebb5189980c53e6e66709d22bee572 Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Wed, 29 May 2024 20:28:20 +1200
Subject: mm: remove the implementation of swap_free() and always use
 swap_free_nr()

To streamline maintenance efforts, we propose removing the implementation
of swap_free().  Instead, we can simply invoke swap_free_nr() with nr set
to 1.  swap_free_nr() is designed with a bitmap consisting of only one
long, resulting in overhead that can be ignored for cases where nr equals
1.

A prime candidate for leveraging swap_free_nr() lies within
kernel/power/swap.c.  Implementing this change facilitates the adoption of
batch processing for hibernation.

Link: https://lkml.kernel.org/r/20240529082824.150954-3-21cnbao@gmail.com
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Suggested-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Acked-by: Chris Li <chrisl@kernel.org>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Len Brown <len.brown@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chuanhua Han <hanchuanhua@oppo.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Gao Xiang <xiang@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <kasong@tencent.com>
Cc: Khalid Aziz <khalid.aziz@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0f41fe49c9dc..d33ce740b695 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -477,7 +477,6 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t);
-extern void swap_free(swp_entry_t);
 extern void swap_free_nr(swp_entry_t entry, int nr_pages);
 extern void swapcache_free_entries(swp_entry_t *entries, int n);
 extern void free_swap_and_cache_nr(swp_entry_t entry, int nr);
@@ -556,10 +555,6 @@ static inline int swapcache_prepare(swp_entry_t swp)
 	return 0;
 }
 
-static inline void swap_free(swp_entry_t swp)
-{
-}
-
 static inline void swap_free_nr(swp_entry_t entry, int nr_pages)
 {
 }
@@ -608,6 +603,11 @@ static inline void free_swap_and_cache(swp_entry_t entry)
 	free_swap_and_cache_nr(entry, 1);
 }
 
+static inline void swap_free(swp_entry_t entry)
+{
+	swap_free_nr(entry, 1);
+}
+
 #ifdef CONFIG_MEMCG
 static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
 {
-- 
cgit v1.2.3


From 29f252cdc293f4a50b5d3dcbed53701d8444614d Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Wed, 29 May 2024 20:28:22 +1200
Subject: mm: introduce arch_do_swap_page_nr() which allows restore metadata
 for nr pages

Should do_swap_page() have the capability to directly map a large folio,
metadata restoration becomes necessary for a specified number of pages
denoted as nr.  It's important to highlight that metadata restoration is
solely required by the SPARC platform, which, however, does not enable
THP_SWAP.  Consequently, in the present kernel configuration, there exists
no practical scenario where users necessitate the restoration of nr
metadata.  Platforms implementing THP_SWAP might invoke this function with
nr values exceeding 1, subsequent to do_swap_page() successfully mapping
an entire large folio.  Nonetheless, their arch_do_swap_page_nr()
functions remain empty.

Link: https://lkml.kernel.org/r/20240529082824.150954-5-21cnbao@gmail.com
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Khalid Aziz <khalid.aziz@oracle.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Chuanhua Han <hanchuanhua@oppo.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gao Xiang <xiang@kernel.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <kasong@tencent.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 117b807e3f89..2f32eaccf0b9 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1089,6 +1089,15 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b)
 })
 
 #ifndef __HAVE_ARCH_DO_SWAP_PAGE
+static inline void arch_do_swap_page_nr(struct mm_struct *mm,
+				     struct vm_area_struct *vma,
+				     unsigned long addr,
+				     pte_t pte, pte_t oldpte,
+				     int nr)
+{
+
+}
+#else
 /*
  * Some architectures support metadata associated with a page. When a
  * page is being swapped out, this metadata must be saved so it can be
@@ -1097,12 +1106,17 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b)
  * page as metadata for the page. arch_do_swap_page() can restore this
  * metadata when a page is swapped back in.
  */
-static inline void arch_do_swap_page(struct mm_struct *mm,
-				     struct vm_area_struct *vma,
-				     unsigned long addr,
-				     pte_t pte, pte_t oldpte)
-{
-
+static inline void arch_do_swap_page_nr(struct mm_struct *mm,
+					struct vm_area_struct *vma,
+					unsigned long addr,
+					pte_t pte, pte_t oldpte,
+					int nr)
+{
+	for (int i = 0; i < nr; i++) {
+		arch_do_swap_page(vma->vm_mm, vma, addr + i * PAGE_SIZE,
+				pte_advance_pfn(pte, i),
+				pte_advance_pfn(oldpte, i));
+	}
 }
 #endif
 
-- 
cgit v1.2.3


From 16540dae959d862909716d5c854e9b3aee285609 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 30 May 2024 10:14:27 -0700
Subject: mm/hugetlb: mm/memory_hotplug: use a folio in scan_movable_pages()

By using a folio in scan_movable_pages() we convert the last user of the
page-based hugetlb information macro functions to the folio version.
After this conversion, we can safely remove the page-based definitions
from include/linux/hugetlb.h.

Link: https://lkml.kernel.org/r/20240530171427.242018-1-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 15a58f69782c..279aca379b95 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -616,9 +616,7 @@ static __always_inline						\
 bool folio_test_hugetlb_##flname(struct folio *folio)		\
 	{	void *private = &folio->private;		\
 		return test_bit(HPG_##flname, private);		\
-	}							\
-static inline int HPage##uname(struct page *page)		\
-	{ return test_bit(HPG_##flname, &(page->private)); }
+	}
 
 #define SETHPAGEFLAG(uname, flname)				\
 static __always_inline						\
@@ -637,8 +635,6 @@ void folio_clear_hugetlb_##flname(struct folio *folio)		\
 #define TESTHPAGEFLAG(uname, flname)				\
 static inline bool						\
 folio_test_hugetlb_##flname(struct folio *folio)		\
-	{ return 0; }						\
-static inline int HPage##uname(struct page *page)		\
 	{ return 0; }
 
 #define SETHPAGEFLAG(uname, flname)				\
-- 
cgit v1.2.3


From fefc6e6631ff43427e81f08c8e49f7787ff0213a Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Tue, 28 May 2024 09:40:50 -0700
Subject: memcg: rearrange fields of mem_cgroup_per_node

Kernel test robot reported [1] performance regression for will-it-scale
test suite's page_fault2 test case for the commit 70a64b7919cb ("memcg:
dynamically allocate lruvec_stats").  After inspection it seems like the
commit has unintentionally introduced false cache sharing.

After the commit the fields of mem_cgroup_per_node which get read on the
performance critical path share the cacheline with the fields which get
updated often on LRU page allocations or deallocations.  This has caused
contention on that cacheline and the workloads which manipulates a lot of
LRU pages are regressed as reported by the test report.

The solution is to rearrange the fields of mem_cgroup_per_node such that
the false sharing is eliminated.  Let's move all the read only pointers at
the start of the struct, followed by memcg-v1 only fields and at the end
fields which get updated often.

Experiment setup: Ran fallocate1, fallocate2, page_fault1, page_fault2 and
page_fault3 from the will-it-scale test suite inside a three level memcg
with /tmp mounted as tmpfs on two different machines, one a single numa
node and the other one, two node machine.

 $ ./[testcase]_processes -t $NR_CPUS -s 50

Results for single node, 52 CPU machine:

Testcase        base        with-patch

fallocate1      1031081     1431291  (38.80 %)
fallocate2      1029993     1421421  (38.00 %)
page_fault1     2269440     3405788  (50.07 %)
page_fault2     2375799     3572868  (50.30 %)
page_fault3     28641143    28673950 ( 0.11 %)

Results for dual node, 80 CPU machine:

Testcase        base        with-patch

fallocate1      2976288     3641185  (22.33 %)
fallocate2      2979366     3638181  (22.11 %)
page_fault1     6221790     7748245  (24.53 %)
page_fault2     6482854     7847698  (21.05 %)
page_fault3     28804324    28991870 ( 0.65 %)

Link: https://lkml.kernel.org/r/20240528164050.2625718-1-shakeel.butt@linux.dev
Fixes: 70a64b7919cb ("memcg: dynamically allocate lruvec_stats")
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Reported-by: kernel test robot <oliver.sang@intel.com>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Feng Tang <feng.tang@intel.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Yin Fengwei <fengwei.yin@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3d1599146afe..7403dd5926eb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -96,23 +96,29 @@ struct mem_cgroup_reclaim_iter {
  * per-node information in memory controller.
  */
 struct mem_cgroup_per_node {
-	struct lruvec		lruvec;
+	/* Keep the read-only fields at the start */
+	struct mem_cgroup	*memcg;		/* Back pointer, we cannot */
+						/* use container_of	   */
 
 	struct lruvec_stats_percpu __percpu	*lruvec_stats_percpu;
 	struct lruvec_stats			*lruvec_stats;
-
-	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
-
-	struct mem_cgroup_reclaim_iter	iter;
-
 	struct shrinker_info __rcu	*shrinker_info;
 
+	/*
+	 * Memcg-v1 only stuff in middle as buffer between read mostly fields
+	 * and update often fields to avoid false sharing. Once v1 stuff is
+	 * moved in a separate struct, an explicit padding is needed.
+	 */
+
 	struct rb_node		tree_node;	/* RB tree node */
 	unsigned long		usage_in_excess;/* Set to the value by which */
 						/* the soft limit is exceeded*/
 	bool			on_tree;
-	struct mem_cgroup	*memcg;		/* Back pointer, we cannot */
-						/* use container_of	   */
+
+	/* Fields which get updated often at the end. */
+	struct lruvec		lruvec;
+	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
+	struct mem_cgroup_reclaim_iter	iter;
 };
 
 struct mem_cgroup_threshold {
-- 
cgit v1.2.3


From 21664442be1bf79094dd9d21b8833acbfbd80ea7 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Tue, 28 May 2024 16:43:13 +0200
Subject: percpu: add __this_cpu_try_cmpxchg()

Add __this_cpu_try_cmpxchg() version of the percpu op.

Link: https://lkml.kernel.org/r/20240528144345.5980-1-ubizjak@gmail.com
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Acked-by: Dennis Zhou <dennis@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/percpu-defs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index ec3573119923..8efce7414fad 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -475,6 +475,12 @@ do {									\
 	raw_cpu_cmpxchg(pcp, oval, nval);				\
 })
 
+#define __this_cpu_try_cmpxchg(pcp, ovalp, nval)			\
+({									\
+	__this_cpu_preempt_check("try_cmpxchg");			\
+	raw_cpu_try_cmpxchg(pcp, ovalp, nval);				\
+})
+
 #define __this_cpu_sub(pcp, val)	__this_cpu_add(pcp, -(typeof(pcp))(val))
 #define __this_cpu_inc(pcp)		__this_cpu_add(pcp, 1)
 #define __this_cpu_dec(pcp)		__this_cpu_sub(pcp, 1)
-- 
cgit v1.2.3


From 4b98995530b77a97912230d8e1564ba7738db19c Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Tue, 11 Jun 2024 18:11:07 +0800
Subject: mm: shmem: add multi-size THP sysfs interface for anonymous shmem

To support the use of mTHP with anonymous shmem, add a new sysfs interface
'shmem_enabled' in the '/sys/kernel/mm/transparent_hugepage/hugepages-kB/'
directory for each mTHP to control whether shmem is enabled for that mTHP,
with a value similar to the top level 'shmem_enabled', which can be set
to: "always", "inherit (to inherit the top level setting)", "within_size",
"advise", "never".  An 'inherit' option is added to ensure compatibility
with these global settings, and the options 'force' and 'deny' are
dropped, which are rather testing artifacts from the old ages.

By default, PMD-sized hugepages have enabled="inherit" and all other
hugepage sizes have enabled="never" for
'/sys/kernel/mm/transparent_hugepage/hugepages-xxkB/shmem_enabled'.

In addition, if top level value is 'force', then only PMD-sized hugepages
have enabled="inherit", otherwise configuration will be failed and vice
versa.  That means now we will avoid using non-PMD sized THP to override
the global huge allocation.

[baolin.wang@linux.alibaba.com: fix transhuge.rst indentation]
  Link: https://lkml.kernel.org/r/b189d815-998b-4dfd-ba89-218ff51313f8@linux.alibaba.com
[akpm@linux-foundation.org: reflow transhuge.rst addition to 80 cols]
[baolin.wang@linux.alibaba.com: move huge_shmem_orders_lock under CONFIG_SYSFS]
  Link: https://lkml.kernel.org/r/eb34da66-7f12-44f3-a39e-2bcc90c33354@linux.alibaba.com
[akpm@linux-foundation.org: huge_memory.c needs mm_types.h]
Link: https://lkml.kernel.org/r/ffddfa8b3cb4266ff963099ab78cfd7184c57ac7.1718090413.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <v-songbaohua@oppo.com>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Pankaj Raghav <p.raghav@samsung.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 088d66a54643..6a1edd752968 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -6,6 +6,7 @@
 #include <linux/mm_types.h>
 
 #include <linux/fs.h> /* only for vma_is_dax() */
+#include <linux/kobject.h>
 
 vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -63,6 +64,7 @@ ssize_t single_hugepage_flag_show(struct kobject *kobj,
 				  struct kobj_attribute *attr, char *buf,
 				  enum transparent_hugepage_flag flag);
 extern struct kobj_attribute shmem_enabled_attr;
+extern struct kobj_attribute thpsize_shmem_enabled_attr;
 
 /*
  * Mask of all large folio orders supported for anonymous THP; all orders up to
@@ -265,6 +267,14 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
 	return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders);
 }
 
+struct thpsize {
+	struct kobject kobj;
+	struct list_head node;
+	int order;
+};
+
+#define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj)
+
 enum mthp_stat_item {
 	MTHP_STAT_ANON_FAULT_ALLOC,
 	MTHP_STAT_ANON_FAULT_FALLBACK,
-- 
cgit v1.2.3


From e7a2ab7b3bb5d87f99f2ea3d4481d52fc5ceb52d Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Tue, 11 Jun 2024 18:11:08 +0800
Subject: mm: shmem: add mTHP support for anonymous shmem

Commit 19eaf44954df adds multi-size THP (mTHP) for anonymous pages, that
can allow THP to be configured through the sysfs interface located at
'/sys/kernel/mm/transparent_hugepage/hugepage-XXkb/enabled'.

However, the anonymous shmem will ignore the anonymous mTHP rule
configured through the sysfs interface, and can only use the PMD-mapped
THP, that is not reasonable.  Users expect to apply the mTHP rule for all
anonymous pages, including the anonymous shmem, in order to enjoy the
benefits of mTHP.  For example, lower latency than PMD-mapped THP, smaller
memory bloat than PMD-mapped THP, contiguous PTEs on ARM architecture to
reduce TLB miss etc.  In addition, the mTHP interfaces can be extended to
support all shmem/tmpfs scenarios in the future, especially for the shmem
mmap() case.

The primary strategy is similar to supporting anonymous mTHP.  Introduce a
new interface '/mm/transparent_hugepage/hugepage-XXkb/shmem_enabled',
which can have almost the same values as the top-level
'/sys/kernel/mm/transparent_hugepage/shmem_enabled', with adding a new
additional "inherit" option and dropping the testing options 'force' and
'deny'.  By default all sizes will be set to "never" except PMD size,
which is set to "inherit".  This ensures backward compatibility with the
anonymous shmem enabled of the top level, meanwhile also allows
independent control of anonymous shmem enabled for each mTHP.

Link: https://lkml.kernel.org/r/65796c1e72e51e15f3410195b5c2d5b6c160d411.1718090413.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <v-songbaohua@oppo.com>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Pankaj Raghav <p.raghav@samsung.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 6a1edd752968..53b7a137f460 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -560,6 +560,16 @@ static inline bool thp_migration_supported(void)
 {
 	return false;
 }
+
+static inline int highest_order(unsigned long orders)
+{
+	return 0;
+}
+
+static inline int next_order(unsigned long *orders, int prev)
+{
+	return 0;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline int split_folio_to_list_to_order(struct folio *folio,
-- 
cgit v1.2.3


From 66f44583f9b617d74ffa2487e75a9c3adf344ddb Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Tue, 11 Jun 2024 18:11:10 +0800
Subject: mm: shmem: add mTHP counters for anonymous shmem

Add mTHP counters for anonymous shmem.

[baolin.wang@linux.alibaba.com: update Documentation/admin-guide/mm/transhuge.rst]
  Link: https://lkml.kernel.org/r/d86e2e7f-4141-432b-b2ba-c6691f36ef0b@linux.alibaba.com
Link: https://lkml.kernel.org/r/4fd9e467d49ae4a747e428bcd821c7d13125ae67.1718090413.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Lance Yang <ioworker0@gmail.com>
Cc: Barry Song <v-songbaohua@oppo.com>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Pankaj Raghav <p.raghav@samsung.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 53b7a137f460..7ad41de5eaea 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -281,6 +281,9 @@ enum mthp_stat_item {
 	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
 	MTHP_STAT_SWPOUT,
 	MTHP_STAT_SWPOUT_FALLBACK,
+	MTHP_STAT_FILE_ALLOC,
+	MTHP_STAT_FILE_FALLBACK,
+	MTHP_STAT_FILE_FALLBACK_CHARGE,
 	__MTHP_STAT_COUNT
 };
 
-- 
cgit v1.2.3


From cdd9a571b7d8465353fe2e2d8d1edd8a58d82021 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 7 Jun 2024 14:23:56 +0200
Subject: fs/proc: move page_mapcount() to fs/proc/internal.h

...  and rename it to folio_precise_page_mapcount().  fs/proc is the last
remaining user, and that should stay that way.

While at it, cleanup kpagecount_read() a bit: there are still some legacy
leftovers -- when the interface was introduced it returned the page
refcount, but was changed briefly afterwards to return the page mapcount.
Further, some simple folio conversion.

Once we stop using the per-page mapcounts of large folios, all
folio_precise_page_mapcount() users will have to implement an alternative
way to achieve what they are trying to achieve, possibly in a less precise
way.

[dan.carpenter@linaro.org: fix uninitialized variable in pagemap_pmd_range()]
  Link: https://lkml.kernel.org/r/9d6eaba7-92f8-4a70-8765-38a519680a87@moroto.mountain
Link: https://lkml.kernel.org/r/20240607122357.115423-6-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Oscar Salvador <osalvador@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 27 +--------------------------
 1 file changed, 1 insertion(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f4bf94ca99e3..7f864d5f52b7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1202,8 +1202,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
 /*
  * How many times the entire folio is mapped as a single unit (eg by a
  * PMD or PUD entry).  This is probably not what you want, except for
- * debugging purposes - it does not include PTE-mapped sub-pages; look
- * at folio_mapcount() or page_mapcount() instead.
+ * debugging purposes or implementation of other core folio_*() primitives.
  */
 static inline int folio_entire_mapcount(const struct folio *folio)
 {
@@ -1221,30 +1220,6 @@ static inline void page_mapcount_reset(struct page *page)
 	atomic_set(&(page)->_mapcount, -1);
 }
 
-/**
- * page_mapcount() - Number of times this precise page is mapped.
- * @page: The page.
- *
- * The number of times this page is mapped.  If this page is part of
- * a large folio, it includes the number of times this page is mapped
- * as part of that folio.
- *
- * Will report 0 for pages which cannot be mapped into userspace, eg
- * slab, page tables and similar.
- */
-static inline int page_mapcount(struct page *page)
-{
-	int mapcount = atomic_read(&page->_mapcount) + 1;
-
-	/* Handle page_has_type() pages */
-	if (mapcount < PAGE_MAPCOUNT_RESERVE + 1)
-		mapcount = 0;
-	if (unlikely(PageCompound(page)))
-		mapcount += folio_entire_mapcount(page_folio(page));
-
-	return mapcount;
-}
-
 static inline int folio_large_mapcount(const struct folio *folio)
 {
 	VM_WARN_ON_FOLIO(!folio_test_large(folio), folio);
-- 
cgit v1.2.3


From 7a581204b1fa4fed233ed3b7ffcf6847cc383dbc Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 7 Jun 2024 10:37:10 +0200
Subject: mm/highmem: reimplement totalhigh_pages() by walking zones

Patch series "mm/highmem: don't track highmem pages manually".

Let's remove highmem special-casing from adjust_managed_page_count(), to
result in less confusion why memblock manually adjusts totalram_pages, and
__free_pages_core() only adjusts the zone's managed pages -- what about
the highmem pages that adjust_managed_page_count() updates?

Now, we only maintain totalram_pages and a zone's managed pages
independent of highmem support.  We can derive the number of highmem pages
simply by looking at the relevant zone's managed pages.  I don't think
there is any particular fast path that needs a maximum-efficient
totalhigh_pages() implementation.

Note that highmem memory is currently initialized using
free_highmem_page()->free_reserved_page(), not __free_pages_core().  In
the future we might want to also use __free_pages_core() to initialize
highmem memory, to make that less special, and consider moving
totalram_pages updates into __free_pages_core() [1], so we can just use
adjust_managed_page_count() in there as well.

Booting a simple kernel in QEMU reveals no highmem accounting change:

Before:
  Memory: 3095448K/3145208K available (14802K kernel code, 2073K rwdata,
  5000K rodata, 740K init, 556K bss, 49760K reserved, 0K cma-reserved,
  2244488K highmem)

After:
  Memory: 3095276K/3145208K available (14802K kernel code, 2073K rwdata,
  5000K rodata, 740K init, 556K bss, 49932K reserved, 0K cma-reserved,
  2244488K highmem)

[1] https://lkml.kernel.org/r/20240601133402.2675-1-richard.weiyang@gmail.com


This patch (of 2):

Can we get rid of the highmem ifdef in adjust_managed_page_count()?
Likely yes: we don't have that many totalhigh_pages() users, and they all
don't seem to be very performance critical.

So let's implement totalhigh_pages() like nr_free_highpages(), collecting
information from all zones.  This is now similar to what we do in
si_meminfo_node() to collect the per-node highmem page count.

In the common case (single node, 3-4 zones), we really shouldn't care.  We
could optimize a bit further (only walk ZONE_HIGHMEM and ZONE_MOVABLE if
required), but there doesn't seem a real need for that.

[david@redhat.com: fix build bot complaint]
  Link: https://lkml.kernel.org/r/b57e5bc4-eb72-40e3-add4-57dfa6e03df6@redhat.com
Link: https://lkml.kernel.org/r/20240607083711.62833-1-david@redhat.com
Link: https://lkml.kernel.org/r/20240607083711.62833-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/highmem-internal.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index a3028e400a9c..65f865fbbac0 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -132,7 +132,7 @@ static inline void __kunmap_atomic(const void *addr)
 }
 
 unsigned int __nr_free_highpages(void);
-extern atomic_long_t _totalhigh_pages;
+unsigned long __totalhigh_pages(void);
 
 static inline unsigned int nr_free_highpages(void)
 {
@@ -141,12 +141,7 @@ static inline unsigned int nr_free_highpages(void)
 
 static inline unsigned long totalhigh_pages(void)
 {
-	return (unsigned long)atomic_long_read(&_totalhigh_pages);
-}
-
-static inline void totalhigh_pages_add(long count)
-{
-	atomic_long_add(count, &_totalhigh_pages);
+	return __totalhigh_pages();
 }
 
 static inline bool is_kmap_addr(const void *x)
-- 
cgit v1.2.3


From 90b8fab5cdc441735d388e286c715da7c85b24f1 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 7 Jun 2024 10:37:11 +0200
Subject: mm/highmem: make nr_free_highpages() return "unsigned long"

It looks rather weird that totalhigh_pages() returns an "unsigned long"
but nr_free_highpages() returns an "unsigned int".

Let's return an "unsigned long" from nr_free_highpages() to be consistent.

While at it, use a plain "0" instead of a "0UL" in the !CONFIG_HIGHMEM
totalhigh_pages() implementation, to make these look alike as well.

Link: https://lkml.kernel.org/r/20240607083711.62833-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/highmem-internal.h | 8 ++++----
 include/linux/highmem.h          | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 65f865fbbac0..dd100e849f5e 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -131,10 +131,10 @@ static inline void __kunmap_atomic(const void *addr)
 		preempt_enable();
 }
 
-unsigned int __nr_free_highpages(void);
+unsigned long __nr_free_highpages(void);
 unsigned long __totalhigh_pages(void);
 
-static inline unsigned int nr_free_highpages(void)
+static inline unsigned long nr_free_highpages(void)
 {
 	return __nr_free_highpages();
 }
@@ -234,8 +234,8 @@ static inline void __kunmap_atomic(const void *addr)
 		preempt_enable();
 }
 
-static inline unsigned int nr_free_highpages(void) { return 0; }
-static inline unsigned long totalhigh_pages(void) { return 0UL; }
+static inline unsigned long nr_free_highpages(void) { return 0; }
+static inline unsigned long totalhigh_pages(void) { return 0; }
 
 static inline bool is_kmap_addr(const void *x)
 {
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 00341b56d291..fa6891e06316 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -179,7 +179,7 @@ static inline void *kmap_local_folio(struct folio *folio, size_t offset);
 static inline void *kmap_atomic(struct page *page);
 
 /* Highmem related interfaces for management code */
-static inline unsigned int nr_free_highpages(void);
+static inline unsigned long nr_free_highpages(void);
 static inline unsigned long totalhigh_pages(void);
 
 #ifndef ARCH_HAS_FLUSH_ANON_PAGE
-- 
cgit v1.2.3


From 29e847d2ade3cdff36afe095fdbeb9b5f71a197a Mon Sep 17 00:00:00 2001
From: Lance Yang <ioworker0@gmail.com>
Date: Fri, 14 Jun 2024 09:51:37 +0800
Subject: mm/rmap: integrate PMD-mapped folio splitting into pagewalk loop

In preparation for supporting try_to_unmap_one() to unmap PMD-mapped
folios, start the pagewalk first, then call split_huge_pmd_address() to
split the folio.

Link: https://lkml.kernel.org/r/20240614015138.31461-3-ioworker0@gmail.com
Signed-off-by: Lance Yang <ioworker0@gmail.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Suggested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Cc: Bang Li <libang.li@antgroup.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Fangrui Song <maskray@google.com>
Cc: Jeff Xie <xiehuan09@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yin Fengwei <fengwei.yin@intel.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h |  6 ++++++
 include/linux/rmap.h    | 24 ++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7ad41de5eaea..9f720b0731c4 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -428,6 +428,9 @@ static inline bool thp_migration_supported(void)
 	return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
 }
 
+void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
+			   pmd_t *pmd, bool freeze, struct folio *folio);
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline bool folio_test_pmd_mappable(struct folio *folio)
@@ -490,6 +493,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long address, bool freeze, struct folio *folio) {}
 static inline void split_huge_pmd_address(struct vm_area_struct *vma,
 		unsigned long address, bool freeze, struct folio *folio) {}
+static inline void split_huge_pmd_locked(struct vm_area_struct *vma,
+					 unsigned long address, pmd_t *pmd,
+					 bool freeze, struct folio *folio) {}
 
 #define split_huge_pud(__vma, __pmd, __address)	\
 	do { } while (0)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bb53e5920b88..bf46787c8eba 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -703,6 +703,30 @@ static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
 		spin_unlock(pvmw->ptl);
 }
 
+/**
+ * page_vma_mapped_walk_restart - Restart the page table walk.
+ * @pvmw: Pointer to struct page_vma_mapped_walk.
+ *
+ * It restarts the page table walk when changes occur in the page
+ * table, such as splitting a PMD. Ensures that the PTL held during
+ * the previous walk is released and resets the state to allow for
+ * a new walk starting at the current address stored in pvmw->address.
+ */
+static inline void
+page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw)
+{
+	WARN_ON_ONCE(!pvmw->pmd && !pvmw->pte);
+
+	if (likely(pvmw->ptl))
+		spin_unlock(pvmw->ptl);
+	else
+		WARN_ON_ONCE(1);
+
+	pvmw->ptl = NULL;
+	pvmw->pmd = NULL;
+	pvmw->pte = NULL;
+}
+
 bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
 
 /*
-- 
cgit v1.2.3


From 735ecdfaf4e802209caf34b7ac45adf448c54ccc Mon Sep 17 00:00:00 2001
From: Lance Yang <ioworker0@gmail.com>
Date: Fri, 14 Jun 2024 09:51:38 +0800
Subject: mm/vmscan: avoid split lazyfree THP during shrink_folio_list()

When the user no longer requires the pages, they would use
madvise(MADV_FREE) to mark the pages as lazy free.  Subsequently, they
typically would not re-write to that memory again.

During memory reclaim, if we detect that the large folio and its PMD are
both still marked as clean and there are no unexpected references (such as
GUP), so we can just discard the memory lazily, improving the efficiency
of memory reclamation in this case.

On an Intel i5 CPU, reclaiming 1GiB of lazyfree THPs using
mem_cgroup_force_empty() results in the following runtimes in seconds
(shorter is better):

--------------------------------------------
|     Old       |      New       |  Change  |
--------------------------------------------
|   0.683426    |    0.049197    |  -92.80% |
--------------------------------------------

[ioworker0@gmail.com: minor changes per David]
  Link: https://lkml.kernel.org/r/20240622100057.3352-1-ioworker0@gmail.com
Link: https://lkml.kernel.org/r/20240614015138.31461-4-ioworker0@gmail.com
Signed-off-by: Lance Yang <ioworker0@gmail.com>
Suggested-by: Zi Yan <ziy@nvidia.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Cc: Bang Li <libang.li@antgroup.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Fangrui Song <maskray@google.com>
Cc: Jeff Xie <xiehuan09@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yin Fengwei <fengwei.yin@intel.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 9f720b0731c4..212cca384d7e 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -430,6 +430,8 @@ static inline bool thp_migration_supported(void)
 
 void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
 			   pmd_t *pmd, bool freeze, struct folio *folio);
+bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr,
+			   pmd_t *pmdp, struct folio *folio);
 
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 
@@ -497,6 +499,13 @@ static inline void split_huge_pmd_locked(struct vm_area_struct *vma,
 					 unsigned long address, pmd_t *pmd,
 					 bool freeze, struct folio *folio) {}
 
+static inline bool unmap_huge_pmd_locked(struct vm_area_struct *vma,
+					 unsigned long addr, pmd_t *pmdp,
+					 struct folio *folio)
+{
+	return false;
+}
+
 #define split_huge_pud(__vma, __pmd, __address)	\
 	do { } while (0)
 
-- 
cgit v1.2.3


From 2b33a97c94bc44468fc1d54b745269c0cf0b7bb2 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Tue, 11 Jun 2024 02:45:14 +0000
Subject: mm: zswap: rename is_zswap_enabled() to zswap_is_enabled()

In preparation for introducing a similar function, rename
is_zswap_enabled() to use zswap_* prefix like other zswap functions.

Link: https://lkml.kernel.org/r/20240611024516.1375191-1-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chris Li <chrisl@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/zswap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 2a85b941db97..ce5e7bfe8f1e 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -35,7 +35,7 @@ void zswap_swapoff(int type);
 void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
 void zswap_lruvec_state_init(struct lruvec *lruvec);
 void zswap_folio_swapin(struct folio *folio);
-bool is_zswap_enabled(void);
+bool zswap_is_enabled(void);
 #else
 
 struct zswap_lruvec_state {};
@@ -60,7 +60,7 @@ static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {}
 static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {}
 static inline void zswap_folio_swapin(struct folio *folio) {}
 
-static inline bool is_zswap_enabled(void)
+static inline bool zswap_is_enabled(void)
 {
 	return false;
 }
-- 
cgit v1.2.3


From 2d4d2b1cfb85cc07f6d5619acb882d8b11e55cf4 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Tue, 11 Jun 2024 02:45:15 +0000
Subject: mm: zswap: add zswap_never_enabled()

Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if
zswap was never enabled on the system.  It is implemented using static
branches for efficiency, as enabling zswap should be a rare event.  This
could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but
zswap is never enabled.

However, the real motivation behind this patch is two-fold:
- Incoming large folio swapin work will need to fallback to order-0
  folios if zswap was ever enabled, because any part of the folio could be
  in zswap, until proper handling of large folios with zswap is added.

- A warning and recovery attempt will be added in a following change in
  case the above was not done incorrectly.  Zswap will fail the read if
  the folio is large and it was ever enabled.

Expose zswap_never_enabled() in the header for the swapin work to use
it later.

[yosryahmed@google.com: expose zswap_never_enabled() in the header]
  Link: https://lkml.kernel.org/r/Zmjf0Dr8s9xSW41X@google.com
Link: https://lkml.kernel.org/r/20240611024516.1375191-2-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Chris Li <chrisl@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/zswap.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index ce5e7bfe8f1e..bf83ae5e285d 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -36,6 +36,7 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
 void zswap_lruvec_state_init(struct lruvec *lruvec);
 void zswap_folio_swapin(struct folio *folio);
 bool zswap_is_enabled(void);
+bool zswap_never_enabled(void);
 #else
 
 struct zswap_lruvec_state {};
@@ -65,6 +66,11 @@ static inline bool zswap_is_enabled(void)
 	return false;
 }
 
+static inline bool zswap_never_enabled(void)
+{
+	return false;
+}
+
 #endif
 
 #endif /* _LINUX_ZSWAP_H */
-- 
cgit v1.2.3


From 15995a35247442aefa0ffe36a6dad51cb46b0918 Mon Sep 17 00:00:00 2001
From: Sourav Panda <souravpanda@google.com>
Date: Wed, 5 Jun 2024 22:27:51 +0000
Subject: mm: report per-page metadata information

Today, we do not have any observability of per-page metadata and how much
it takes away from the machine capacity.  Thus, we want to describe the
amount of memory that is going towards per-page metadata, which can vary
depending on build configuration, machine architecture, and system use.

This patch adds 2 fields to /proc/vmstat that can used as shown below:

Accounting per-page metadata allocated by boot-allocator:
	/proc/vmstat:nr_memmap_boot * PAGE_SIZE

Accounting per-page metadata allocated by buddy-allocator:
	/proc/vmstat:nr_memmap * PAGE_SIZE

Accounting total Perpage metadata allocated on the machine:
	(/proc/vmstat:nr_memmap_boot +
	 /proc/vmstat:nr_memmap) * PAGE_SIZE

Utility for userspace:

Observability: Describe the amount of memory overhead that is going to
per-page metadata on the system at any given time since this overhead is
not currently observable.

Debugging: Tracking the changes or absolute value in struct pages can help
detect anomalies as they can be correlated with other metrics in the
machine (e.g., memtotal, number of huge pages, etc).

page_ext overheads: Some kernel features such as page_owner
page_table_check that use page_ext can be optionally enabled via kernel
parameters.  Having the total per-page metadata information helps users
precisely measure impact.  Furthermore, page-metadata metrics will reflect
the amount of struct pages reliquished (or overhead reduced) when
hugetlbfs pages are reserved which will vary depending on whether hugetlb
vmemmap optimization is enabled or not.

For background and results see:
lore.kernel.org/all/20240220214558.3377482-1-souravpanda@google.com

Link: https://lkml.kernel.org/r/20240605222751.1406125-1-souravpanda@google.com
Signed-off-by: Sourav Panda <souravpanda@google.com>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Chen Linxuan <chenlinxuan@uniontech.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ivan Babrou <ivan@cloudflare.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tomas Mudrunka <tomas.mudrunka@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Xu <weixugc@google.com>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 2 ++
 include/linux/vmstat.h | 4 ++++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 586a8f0104d7..cb7f265c2b96 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -220,6 +220,8 @@ enum node_stat_item {
 	PGDEMOTE_KSWAPD,
 	PGDEMOTE_DIRECT,
 	PGDEMOTE_KHUGEPAGED,
+	NR_MEMMAP, /* page metadata allocated through buddy allocator */
+	NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */
 	NR_VM_NODE_STAT_ITEMS
 };
 
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 735eae6e272c..16b0cfa80502 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -624,4 +624,8 @@ static inline void lruvec_stat_sub_folio(struct folio *folio,
 {
 	lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
 }
+
+void __meminit mod_node_early_perpage_metadata(int nid, long delta);
+void __meminit store_early_perpage_metadata(void);
+
 #endif /* _LINUX_VMSTAT_H */
-- 
cgit v1.2.3


From 47179fe03588caa13a9bae642b058901709ddc55 Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Wed, 12 Jun 2024 09:24:08 +0000
Subject: mm/hugetlb_cgroup: prepare cftypes based on template

Unlike other cgroup subsystems, the hugetlb cgroup does not provide a
static array of cftype that explicitly displays the properties, handling
functions, etc., of each file.  Instead, it dynamically creates the
attribute of cftypes based on the hstate during the startup procedure.
This reduces the readability of the code.

To fix this issue, introduce two templates of cftypes, and rebuild the
attributes according to the hstate to make it ready to be added to cgroup
framework.

Link: https://lkml.kernel.org/r/20240612092409.2027592-3-xiujianfeng@huawei.com
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: kernel test robot <oliver.sang@intel.com>
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Subject: mm/hugetlb_cgroup: register lockdep key for cftype
Date: Tue, 18 Jun 2024 07:19:22 +0000

When CONFIG_DEBUG_LOCK_ALLOC is enabled, the following commands can
trigger a bug,

mount -t cgroup2 none /sys/fs/cgroup
cd /sys/fs/cgroup
echo "+hugetlb" > cgroup.subtree_control

The log is as below:

BUG: key ffff8880046d88d8 has not been registered!
------------[ cut here ]------------
DEBUG_LOCKS_WARN_ON(1)
WARNING: CPU: 3 PID: 226 at kernel/locking/lockdep.c:4945 lockdep_init_map_type+0x185/0x220
Modules linked in:
CPU: 3 PID: 226 Comm: bash Not tainted 6.10.0-rc4-next-20240617-g76db4c64526c #544
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
RIP: 0010:lockdep_init_map_type+0x185/0x220
Code: 00 85 c0 0f 84 6c ff ff ff 8b 3d 6a d1 85 01 85 ff 0f 85 5e ff ff ff 48 c7 c6 21 99 4a 82 48 c7 c7 60 29 49 82 e8 3b 2e f5
RSP: 0018:ffffc9000083fc30 EFLAGS: 00000282
RAX: 0000000000000000 RBX: ffffffff828dd820 RCX: 0000000000000027
RDX: ffff88803cd9cac8 RSI: 0000000000000001 RDI: ffff88803cd9cac0
RBP: ffff88800674fbb0 R08: ffffffff828ce248 R09: 00000000ffffefff
R10: ffffffff8285e260 R11: ffffffff828b8eb8 R12: ffff8880046d88d8
R13: 0000000000000000 R14: 0000000000000000 R15: ffff8880067281c0
FS:  00007f68601ea740(0000) GS:ffff88803cd80000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00005614f3ebc740 CR3: 000000000773a000 CR4: 00000000000006f0
Call Trace:
 <TASK>
 ? __warn+0x77/0xd0
 ? lockdep_init_map_type+0x185/0x220
 ? report_bug+0x189/0x1a0
 ? handle_bug+0x3c/0x70
 ? exc_invalid_op+0x18/0x70
 ? asm_exc_invalid_op+0x1a/0x20
 ? lockdep_init_map_type+0x185/0x220
 __kernfs_create_file+0x79/0x100
 cgroup_addrm_files+0x163/0x380
 ? find_held_lock+0x2b/0x80
 ? find_held_lock+0x2b/0x80
 ? find_held_lock+0x2b/0x80
 css_populate_dir+0x73/0x180
 cgroup_apply_control_enable+0x12f/0x3a0
 cgroup_subtree_control_write+0x30b/0x440
 kernfs_fop_write_iter+0x13a/0x1f0
 vfs_write+0x341/0x450
 ksys_write+0x64/0xe0
 do_syscall_64+0x4b/0x110
 entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f68602d9833
Code: 8b 15 61 26 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 64 8b 04 25 18 00 00 00 85 c0 75 14 b8 01 00 00 00 08
RSP: 002b:00007fff9bbdf8e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007f68602d9833
RDX: 0000000000000009 RSI: 00005614f3ebc740 RDI: 0000000000000001
RBP: 00005614f3ebc740 R08: 000000000000000a R09: 0000000000000008
R10: 00005614f3db6ba0 R11: 0000000000000246 R12: 0000000000000009
R13: 00007f68603bd6a0 R14: 0000000000000009 R15: 00007f68603b8880

For lockdep, there is a sanity check in lockdep_init_map_type(), the
lock-class key must either have been allocated statically or must
have been registered as a dynamic key. However the commit e18df2889ff9
("mm/hugetlb_cgroup: prepare cftypes based on template") has changed
the cftypes from static allocated objects to dynamic allocated objects,
so the cft->lockdep_key must be registered proactively.

[xiujianfeng@huawei.com: fix BUG()]
  Link: https://lkml.kernel.org/r/20240619015527.2212698-1-xiujianfeng@huawei.com
Link: https://lkml.kernel.org/r/20240618071922.2127289-1-xiujianfeng@huawei.com
Link: https://lore.kernel.org/all/602186b3-5ce3-41b3-90a3-134792cc2a48@samsung.com/
Fixes: e18df2889ff9 ("mm/hugetlb_cgroup: prepare cftypes based on template")
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202406181046.8d8b2492-oliver.sang@intel.com
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: SeongJae Park <sj@kernel.org>
Closes: https://lore.kernel.org/20240618233608.400367-1-sj@kernel.org
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/cgroup-defs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index ea48c861cd36..dc151cb0ef09 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -676,9 +676,7 @@ struct cftype {
 	__poll_t (*poll)(struct kernfs_open_file *of,
 			 struct poll_table_struct *pt);
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lock_class_key	lockdep_key;
-#endif
 };
 
 /*
-- 
cgit v1.2.3


From b79d715c43354010775862cfcd2d01cb04d0de47 Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Wed, 12 Jun 2024 09:24:09 +0000
Subject: mm/hugetlb_cgroup: switch to the new cftypes

The previous patch has already reconstructed the cftype attributes based
on the templates and saved them in dfl_cftypes and legacy_cftypes.  then
remove the old procedure and switch to the new cftypes.

Link: https://lkml.kernel.org/r/20240612092409.2027592-4-xiujianfeng@huawei.com
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 279aca379b95..a951c0d06061 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -686,11 +686,6 @@ struct hstate {
 	unsigned int nr_huge_pages_node[MAX_NUMNODES];
 	unsigned int free_huge_pages_node[MAX_NUMNODES];
 	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
-#ifdef CONFIG_CGROUP_HUGETLB
-	/* cgroup control files */
-	struct cftype cgroup_files_dfl[8];
-	struct cftype cgroup_files_legacy[10];
-#endif
 	char name[HSTATE_NAME_LEN];
 };
 
-- 
cgit v1.2.3


From ceb32d6aa93ce3282a724f3a0828b4b84d5035f1 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Wed, 12 Jun 2024 15:18:24 +0800
Subject: mm/memory-failure: remove MF_MSG_SLAB

Since commit 46df8e73a4a3 ("mm: free up PG_slab"), MF_MSG_SLAB becomes
unused.  Remove it.  No functional change intended.

Link: https://lkml.kernel.org/r/20240612071835.157004-3-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h      | 1 -
 include/ras/ras_event.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7f864d5f52b7..8a38adaf5480 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4067,7 +4067,6 @@ enum mf_result {
 enum mf_action_page_type {
 	MF_MSG_KERNEL,
 	MF_MSG_KERNEL_HIGH_ORDER,
-	MF_MSG_SLAB,
 	MF_MSG_DIFFERENT_COMPOUND,
 	MF_MSG_HUGE,
 	MF_MSG_FREE_HUGE,
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index cf7f19b7ce64..9bc707fe8819 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -356,7 +356,6 @@ TRACE_EVENT(aer_event,
 #define MF_PAGE_TYPE		\
 	EM ( MF_MSG_KERNEL, "reserved kernel page" )			\
 	EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" )	\
-	EM ( MF_MSG_SLAB, "kernel slab page" )				\
 	EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \
 	EM ( MF_MSG_HUGE, "huge page" )					\
 	EM ( MF_MSG_FREE_HUGE, "free huge page" )			\
-- 
cgit v1.2.3


From 3a78f77fd1fb82c32cb7971a8a97c5cbc83ab69e Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Wed, 12 Jun 2024 15:18:32 +0800
Subject: mm/memory-failure: move some function declarations into internal.h

There are some functions only used inside mm.  Move them into internal.h.
No functional change intended.

Link: https://lkml.kernel.org/r/20240612071835.157004-11-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202405251049.hxjwX7zO-lkp@intel.com/
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h         | 7 -------
 include/linux/page-flags.h | 5 -----
 include/linux/rmap.h       | 2 --
 3 files changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8a38adaf5480..51fe207026f8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4000,7 +4000,6 @@ extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 					bool *migratable_cleared);
 void num_poisoned_pages_inc(unsigned long pfn);
 void num_poisoned_pages_sub(unsigned long pfn, long i);
-struct task_struct *task_early_kill(struct task_struct *tsk, int force_early);
 #else
 static inline void memory_failure_queue(unsigned long pfn, int flags)
 {
@@ -4021,12 +4020,6 @@ static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
 }
 #endif
 
-#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_KSM)
-void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
-		     struct vm_area_struct *vma, struct list_head *to_kill,
-		     unsigned long ksm_addr);
-#endif
-
 #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
 extern void memblk_nr_poison_inc(unsigned long pfn);
 extern void memblk_nr_poison_sub(unsigned long pfn, long i);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 813eea2efe26..b041bc058fb1 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -616,11 +616,6 @@ PAGEFLAG_FALSE(Uncached, uncached)
 PAGEFLAG(HWPoison, hwpoison, PF_ANY)
 TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
-#define MAGIC_HWPOISON	0x48575053U	/* HWPS */
-extern void SetPageHWPoisonTakenOff(struct page *page);
-extern void ClearPageHWPoisonTakenOff(struct page *page);
-extern bool take_page_off_buddy(struct page *page);
-extern bool put_page_back_buddy(struct page *page);
 #else
 PAGEFLAG_FALSE(HWPoison, hwpoison)
 #define __PG_HWPOISON 0
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf46787c8eba..694d3d886245 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -747,8 +747,6 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
 
 void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
 
-unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
-
 /*
  * rmap_walk_control: To control rmap traversing for specific needs
  *
-- 
cgit v1.2.3


From e36287c6e12d00f2087dc0c4899d8a9c54e22e1c Mon Sep 17 00:00:00 2001
From: Hyeongtak Ji <hyeongtak.ji@sk.com>
Date: Fri, 14 Jun 2024 12:00:05 +0900
Subject: mm/damon/sysfs-schemes: add target_nid on sysfs-schemes

This patch adds target_nid under
  /sys/kernel/mm/damon/admin/kdamonds/<N>/contexts/<N>/schemes/<N>/

The 'target_nid' can be used as the destination node for DAMOS actions
such as DAMOS_MIGRATE_{HOT,COLD} in the follow up patches.

[sj@kernel.org: document target_nid file]
  Link: https://lkml.kernel.org/r/20240618213630.84846-3-sj@kernel.org
Link: https://lkml.kernel.org/r/20240614030010.751-4-honggyu.kim@sk.com
Signed-off-by: Hyeongtak Ji <hyeongtak.ji@sk.com>
Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Gregory Price <gregory.price@memverge.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index f7da65e1ac04..21d6b69a015c 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -374,6 +374,7 @@ struct damos_access_pattern {
  * @apply_interval_us:	The time between applying the @action.
  * @quota:		Control the aggressiveness of this scheme.
  * @wmarks:		Watermarks for automated (in)activation of this scheme.
+ * @target_nid:		Destination node if @action is "migrate_{hot,cold}".
  * @filters:		Additional set of &struct damos_filter for &action.
  * @stat:		Statistics of this scheme.
  * @list:		List head for siblings.
@@ -389,6 +390,10 @@ struct damos_access_pattern {
  * monitoring context are inactive, DAMON stops monitoring either, and just
  * repeatedly checks the watermarks.
  *
+ * @target_nid is used to set the migration target node for migrate_hot or
+ * migrate_cold actions, which means it's only meaningful when @action is either
+ * "migrate_hot" or "migrate_cold".
+ *
  * Before applying the &action to a memory region, &struct damon_operations
  * implementation could check pages of the region and skip &action to respect
  * &filters
@@ -410,6 +415,9 @@ struct damos {
 /* public: */
 	struct damos_quota quota;
 	struct damos_watermarks wmarks;
+	union {
+		int target_nid;
+	};
 	struct list_head filters;
 	struct damos_stat stat;
 	struct list_head list;
@@ -726,7 +734,8 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
 			enum damos_action action,
 			unsigned long apply_interval_us,
 			struct damos_quota *quota,
-			struct damos_watermarks *wmarks);
+			struct damos_watermarks *wmarks,
+			int target_nid);
 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
 void damon_destroy_scheme(struct damos *s);
 
-- 
cgit v1.2.3


From ced816a76b8fd1288d48523cc48ed308964d12ed Mon Sep 17 00:00:00 2001
From: Honggyu Kim <honggyu.kim@sk.com>
Date: Fri, 14 Jun 2024 12:00:06 +0900
Subject: mm/migrate: add MR_DAMON to migrate_reason

The current patch series introduces DAMON based migration across NUMA
nodes so it'd be better to have a new migrate_reason in trace events.

Link: https://lkml.kernel.org/r/20240614030010.751-5-honggyu.kim@sk.com
Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Gregory Price <gregory.price@memverge.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Hyeongtak Ji <hyeongtak.ji@sk.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/migrate_mode.h   | 1 +
 include/trace/events/migrate.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
index 9fb482bb7323..265c4328b36a 100644
--- a/include/linux/migrate_mode.h
+++ b/include/linux/migrate_mode.h
@@ -24,6 +24,7 @@ enum migrate_reason {
 	MR_CONTIG_RANGE,
 	MR_LONGTERM_PIN,
 	MR_DEMOTION,
+	MR_DAMON,
 	MR_TYPES
 };
 
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 0190ef725b43..cd01dd7b3640 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -22,7 +22,8 @@
 	EM( MR_NUMA_MISPLACED,	"numa_misplaced")		\
 	EM( MR_CONTIG_RANGE,	"contig_range")			\
 	EM( MR_LONGTERM_PIN,	"longterm_pin")			\
-	EMe(MR_DEMOTION,	"demotion")
+	EM( MR_DEMOTION,	"demotion")			\
+	EMe(MR_DAMON,		"damon")
 
 /*
  * First define the enums in the above macros to be exported to userspace
-- 
cgit v1.2.3


From b51820ebea656be3b48bb16dcdc5ad3f203c4fd7 Mon Sep 17 00:00:00 2001
From: Honggyu Kim <honggyu.kim@sk.com>
Date: Fri, 14 Jun 2024 12:00:07 +0900
Subject: mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion

This patch introduces DAMOS_MIGRATE_COLD action, which is similar to
DAMOS_PAGEOUT, but migrate folios to the given 'target_nid' in the sysfs
instead of swapping them out.

The 'target_nid' sysfs knob informs the migration target node ID.

Here is one of the example usage of this 'migrate_cold' action.

  $ cd /sys/kernel/mm/damon/admin/kdamonds/<N>
  $ cat contexts/<N>/schemes/<N>/action
  migrate_cold
  $ echo 2 > contexts/<N>/schemes/<N>/target_nid
  $ echo commit > state
  $ numactl -p 0 ./hot_cold 500M 600M &
  $ numastat -c -p hot_cold

  Per-node process memory usage (in MBs)
  PID             Node 0 Node 1 Node 2 Total
  --------------  ------ ------ ------ -----
  701 (hot_cold)     501      0    601  1101

Since there are some common routines with pageout, many functions have
similar logics between pageout and migrate cold.

damon_pa_migrate_folio_list() is a minimized version of
shrink_folio_list().

Link: https://lkml.kernel.org/r/20240614030010.751-6-honggyu.kim@sk.com
Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
Signed-off-by: Hyeongtak Ji <hyeongtak.ji@sk.com>
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Gregory Price <gregory.price@memverge.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 21d6b69a015c..56714b6eb0d7 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -105,6 +105,7 @@ struct damon_target {
  * @DAMOS_NOHUGEPAGE:	Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
  * @DAMOS_LRU_PRIO:	Prioritize the region on its LRU lists.
  * @DAMOS_LRU_DEPRIO:	Deprioritize the region on its LRU lists.
+ * @DAMOS_MIGRATE_COLD:	Migrate the regions prioritizing colder regions.
  * @DAMOS_STAT:		Do nothing but count the stat.
  * @NR_DAMOS_ACTIONS:	Total number of DAMOS actions
  *
@@ -122,6 +123,7 @@ enum damos_action {
 	DAMOS_NOHUGEPAGE,
 	DAMOS_LRU_PRIO,
 	DAMOS_LRU_DEPRIO,
+	DAMOS_MIGRATE_COLD,
 	DAMOS_STAT,		/* Do nothing but only record the stat */
 	NR_DAMOS_ACTIONS,
 };
-- 
cgit v1.2.3


From b696722d784fb3610183281d2fd514b0efe97e3b Mon Sep 17 00:00:00 2001
From: Hyeongtak Ji <hyeongtak.ji@sk.com>
Date: Fri, 14 Jun 2024 12:00:08 +0900
Subject: mm/damon/paddr: introduce DAMOS_MIGRATE_HOT action for promotion

This patch introduces DAMOS_MIGRATE_HOT action, which is similar to
DAMOS_MIGRATE_COLD, but proritizes hot pages.

It migrates pages inside the given region to the 'target_nid' NUMA node
in the sysfs.

Here is one of the example usage of this 'migrate_hot' action.

  $ cd /sys/kernel/mm/damon/admin/kdamonds/<N>
  $ cat contexts/<N>/schemes/<N>/action
  migrate_hot
  $ echo 0 > contexts/<N>/schemes/<N>/target_nid
  $ echo commit > state
  $ numactl -p 2 ./hot_cold 500M 600M &
  $ numastat -c -p hot_cold

  Per-node process memory usage (in MBs)
  PID             Node 0 Node 1 Node 2 Total
  --------------  ------ ------ ------ -----
  701 (hot_cold)     501      0    601  1101

Link: https://lkml.kernel.org/r/20240614030010.751-7-honggyu.kim@sk.com
Signed-off-by: Hyeongtak Ji <hyeongtak.ji@sk.com>
Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Gregory Price <gregory.price@memverge.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 56714b6eb0d7..3d62d98d6359 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -105,6 +105,7 @@ struct damon_target {
  * @DAMOS_NOHUGEPAGE:	Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
  * @DAMOS_LRU_PRIO:	Prioritize the region on its LRU lists.
  * @DAMOS_LRU_DEPRIO:	Deprioritize the region on its LRU lists.
+ * @DAMOS_MIGRATE_HOT:  Migrate the regions prioritizing warmer regions.
  * @DAMOS_MIGRATE_COLD:	Migrate the regions prioritizing colder regions.
  * @DAMOS_STAT:		Do nothing but count the stat.
  * @NR_DAMOS_ACTIONS:	Total number of DAMOS actions
@@ -123,6 +124,7 @@ enum damos_action {
 	DAMOS_NOHUGEPAGE,
 	DAMOS_LRU_PRIO,
 	DAMOS_LRU_DEPRIO,
+	DAMOS_MIGRATE_HOT,
 	DAMOS_MIGRATE_COLD,
 	DAMOS_STAT,		/* Do nothing but only record the stat */
 	NR_DAMOS_ACTIONS,
-- 
cgit v1.2.3


From 3ad1dce6c30175dfd3da29d76853a69affbf7489 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 18 Jun 2024 11:17:58 -0700
Subject: mm/damon/core: implement DAMOS quota goals online commit function

Patch series "mm/damon: introduce DAMON parameters online commit function".

DAMON context struct (damon_ctx) contains user requests (parameters),
internal status, and operation results.  For flexible usages, DAMON API
users are encouraged to manually manipulate the struct.  That works well
for simple use cases.  However, it has turned out that it is not that
simple at least for online parameters udpate.  It is easy to forget
properly maintaining internal status and operation results.  Also, such
manual manipulation for online tuning is implemented multiple times on
DAMON API users including DAMON sysfs interface, DAMON_RECLAIM and
DAMON_LRU_SORT.  As a result, we have multiple sources of bugs for same
problem.  Actually we found and fixed a few bugs from online parameter
updating of DAMON API users.

Implement a function for online DAMON parameters update in core layer, and
replace DAMON API users' manual manipulation code for the use case.  The
core layer function could still have bugs, but this change reduces the
source of bugs for the problem to one place.


This patch (of 12):

Implement functions for supporting online DAMOS quota goals parameters
update.  The function receives two DAMOS quota structs.  One is the struct
that currently being used by a kdamond and therefore to be updated.  The
other one contains the parameters to be applied to the first one.  The
function applies the new parameters to the destination struct while
keeping/updating the internal status.  The function should be called from
parameters-update safe place, like DAMON callbacks.

Link: https://lkml.kernel.org/r/20240618181809.82078-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20240618181809.82078-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 3d62d98d6359..ce12c9f1b4e4 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -742,6 +742,7 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
 			int target_nid);
 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
 void damon_destroy_scheme(struct damos *s);
+int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src);
 
 struct damon_target *damon_new_target(void);
 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t);
-- 
cgit v1.2.3


From 9cb3d0b9dfce6a3258d91e6d69e418d0b4cce46a Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 18 Jun 2024 11:17:59 -0700
Subject: mm/damon/core: implement DAMON context commit function

Implement functions for supporting online DAMON context level parameters
update.  The function receives two DAMON context structs.  One is the
struct that currently being used by a kdamond and therefore to be updated.
The other one contains the parameters to be applied to the first one.
The function applies the new parameters to the destination struct while
keeping/updating the internal status and operation results.  The function
should be called from DAMON context-update-safe place, like DAMON
callbacks.

Link: https://lkml.kernel.org/r/20240618181809.82078-3-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index ce12c9f1b4e4..27c546bfc6d4 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -756,6 +756,7 @@ void damon_destroy_ctx(struct damon_ctx *ctx);
 int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs);
 void damon_set_schemes(struct damon_ctx *ctx,
 			struct damos **schemes, ssize_t nr_schemes);
+int damon_commit_ctx(struct damon_ctx *old_ctx, struct damon_ctx *new_ctx);
 int damon_nr_running_ctxs(void);
 bool damon_is_registered_ops(enum damon_ops_id id);
 int damon_register_ops(struct damon_operations *ops);
-- 
cgit v1.2.3


From 6d21dde7adc0a2deb9e0c6b06f42be3f88ca180d Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2024 13:18:59 +0200
Subject: mm: update _mapcount and page_type documentation

Patch series "mm: page_type, zsmalloc and page_mapcount_reset()", v2.

Wanting to remove the remaining abuser of _mapcount/page_type along with
page_mapcount_reset(), I stumbled over zsmalloc, which is yet to be
converted away from "struct page" [1].

Unfortunately, we cannot stop using the page_type field in zsmalloc code
completely for its own purposes.  All other fields in "struct page" are
used one way or the other.  Could we simply store a 2-byte offset value at
the beginning of each page?  Likely, but that will require a bit more
work; and once we have memdesc we might want to move the offset in there
(struct zsalloc?) again.

...  but we can limit the abuse to 16 bit, glue it to a page type that
must be set, and document it.  page_has_type() will always successfully
indicate such zsmalloc pages, and such zsmalloc pages only.

We lose zsmalloc support for PAGE_SIZE > 64KB, which should be tolerable.
We could use more bits from the page type, but 16 bit sounds like a good
idea for now.

So clarify the _mapcount/page_type documentation, use a proper page_type
for zsmalloc, and remove page_mapcount_reset().

[1] https://lore.kernel.org/all/20231130101242.2590384-1-42.hyeyoo@gmail.com/


This patch (of 6):

Let's make it clearer that _mapcount must no longer be used for own
purposes, and how _mapcount and page_type behaves nowadays (also in the
context of hugetlb folios, which are typed folios that will be mapped to
user space).

Move the documentation regarding "-1" over from page_mapcount_reset(),
which we will remove next.  Move "page_type" before "mapcount", to make it
clearer what typed folios are.

Link: https://lkml.kernel.org/r/20240529111904.2069608-1-david@redhat.com
Link: https://lkml.kernel.org/r/20240529111904.2069608-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Tested-by: Sergey Senozhatsky <senozhatsky@chromium.org>	[zram/zsmalloc workloads]
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h       |  5 -----
 include/linux/mm_types.h | 28 +++++++++++++++++-----------
 2 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 51fe207026f8..61a7f680f0ba 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1210,11 +1210,6 @@ static inline int folio_entire_mapcount(const struct folio *folio)
 	return atomic_read(&folio->_entire_mapcount) + 1;
 }
 
-/*
- * The atomic page->_mapcount, starts from -1: so that transitions
- * both from it and to it can be tracked, using atomic_inc_and_test
- * and atomic_add_negative(-1).
- */
 static inline void page_mapcount_reset(struct page *page)
 {
 	atomic_set(&(page)->_mapcount, -1);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index af3a0256fa93..278eade2ad4c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -46,9 +46,7 @@ struct mem_cgroup;
  * which is guaranteed to be aligned.  If you use the same storage as
  * page->mapping, you must restore it to NULL before freeing the page.
  *
- * If your page will not be mapped to userspace, you can also use the four
- * bytes in the mapcount union, but you must call page_mapcount_reset()
- * before freeing it.
+ * The mapcount field must not be used for own purposes.
  *
  * If you want to use the refcount field, it must be used in such a way
  * that other CPUs temporarily incrementing and then decrementing the
@@ -152,18 +150,26 @@ struct page {
 
 	union {		/* This union is 4 bytes in size. */
 		/*
-		 * If the page can be mapped to userspace, encodes the number
-		 * of times this page is referenced by a page table.
+		 * For head pages of typed folios, the value stored here
+		 * allows for determining what this page is used for. The
+		 * tail pages of typed folios will not store a type
+		 * (page_type == _mapcount == -1).
+		 *
+		 * See page-flags.h for a list of page types which are currently
+		 * stored here.
 		 */
-		atomic_t _mapcount;
+		unsigned int page_type;
 
 		/*
-		 * If the page is neither PageSlab nor mappable to userspace,
-		 * the value stored here may help determine what this page
-		 * is used for.  See page-flags.h for a list of page types
-		 * which are currently stored here.
+		 * For pages that are part of non-typed folios for which mappings
+		 * are tracked via the RMAP, encodes the number of times this page
+		 * is directly referenced by a page table.
+		 *
+		 * Note that the mapcount is always initialized to -1, so that
+		 * transitions both from it and to it can be tracked, using
+		 * atomic_inc_and_test() and atomic_add_negative(-1).
 		 */
-		unsigned int page_type;
+		atomic_t _mapcount;
 	};
 
 	/* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
-- 
cgit v1.2.3


From 8db00ad5646171880239b7f10e333278f63d8fcf Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2024 13:19:00 +0200
Subject: mm: allow reuse of the lower 16 bit of the page type with an actual
 type

As long as the owner sets a page type first, we can allow reuse of the
lower 16 bit: sufficient to store an offset into a 64 KiB page, which is
the maximum base page size in *common* configurations (ignoring the 256
KiB variant).  Restrict it to the head page.

We'll use that for zsmalloc next, to set a proper type while still reusing
that field to store information (offset into a base page) that cannot go
elsewhere for now.

Let's reserve the lower 16 bit for that purpose and for catching mapcount
underflows, and let's reduce PAGE_TYPE_BASE to a single bit.

Note that we will still have to overflow the mapcount quite a lot until we
would actually indicate a valid page type.

Start handing out the type bits from highest to lowest, to make it clearer
how many bits for types we have left.  Out of 15 bit we can use for types,
we currently use 6.  If we run out of bits before we have better typing
(e.g., memdesc), we can always investigate storing a value instead [1].

[1] https://lore.kernel.org/all/00ba1dff-7c05-46e8-b0d9-a78ac1cfc198@redhat.com/

[akpm@linux-foundation.org: fix PG_hugetlb typo, per David]
Link: https://lkml.kernel.org/r/20240529111904.2069608-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Tested-by: Sergey Senozhatsky <senozhatsky@chromium.org>	[zram/zsmalloc workloads]
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h   |  5 +++++
 include/linux/page-flags.h | 25 +++++++++++++++----------
 2 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 278eade2ad4c..ef09c4eef6d3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -157,6 +157,11 @@ struct page {
 		 *
 		 * See page-flags.h for a list of page types which are currently
 		 * stored here.
+		 *
+		 * Owners of typed folios may reuse the lower 16 bit of the
+		 * head page page_type field after setting the page type,
+		 * but must reset these 16 bit to -1 before clearing the
+		 * page type.
 		 */
 		unsigned int page_type;
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b041bc058fb1..d221a6a14764 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -941,16 +941,21 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
  */
 
 enum pagetype {
-	PG_buddy	= 0x00000080,
-	PG_offline	= 0x00000100,
-	PG_table	= 0x00000200,
-	PG_guard	= 0x00000400,
-	PG_hugetlb	= 0x00000800,
-	PG_slab		= 0x00001000,
-
-	PAGE_TYPE_BASE	= 0xf0000000,
-	/* Reserve 0x0000007f to catch underflows of _mapcount */
-	PAGE_MAPCOUNT_RESERVE	= -128,
+	PG_buddy	= 0x40000000,
+	PG_offline	= 0x20000000,
+	PG_table	= 0x10000000,
+	PG_guard	= 0x08000000,
+	PG_hugetlb	= 0x04000000,
+	PG_slab		= 0x02000000,
+
+	PAGE_TYPE_BASE	= 0x80000000,
+
+	/*
+	 * Reserve 0xffff0000 - 0xfffffffe to catch _mapcount underflows and
+	 * allow owners that set a type to reuse the lower 16 bit for their own
+	 * purposes.
+	 */
+	PAGE_MAPCOUNT_RESERVE	= ~0x0000ffff,
 };
 
 #define PageType(page, flag)						\
-- 
cgit v1.2.3


From 43d746dc49bb4c82034fce01a92fe67344d664cf Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2024 13:19:01 +0200
Subject: mm/zsmalloc: use a proper page type

Let's clean it up: use a proper page type and store our data (offset into
a page) in the lower 16 bit as documented.

We won't be able to support 256 KiB base pages, which is acceptable.
Teach Kconfig to handle that cleanly using a new CONFIG_HAVE_ZSMALLOC.

Based on this, we should do a proper "struct zsdesc" conversion, as
proposed in [1].

This removes the last _mapcount/page_type offender.

[1] https://lore.kernel.org/all/20231130101242.2590384-1-42.hyeyoo@gmail.com/

Link: https://lkml.kernel.org/r/20240529111904.2069608-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Tested-by: Sergey Senozhatsky <senozhatsky@chromium.org>	[zram/zsmalloc workloads]
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d221a6a14764..0f7c7320391e 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -947,6 +947,7 @@ enum pagetype {
 	PG_guard	= 0x08000000,
 	PG_hugetlb	= 0x04000000,
 	PG_slab		= 0x02000000,
+	PG_zsmalloc	= 0x01000000,
 
 	PAGE_TYPE_BASE	= 0x80000000,
 
@@ -1071,6 +1072,8 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
 FOLIO_TEST_FLAG_FALSE(hugetlb)
 #endif
 
+PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
+
 /**
  * PageHuge - Determine if the page belongs to hugetlbfs
  * @page: The page to test.
-- 
cgit v1.2.3


From 11d5401b011e3557894d824dac210f0b18cc3911 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 29 May 2024 13:19:04 +0200
Subject: mm/mm_init: initialize page->_mapcount directly in
 __init_single_page()

Let's simply reinitialize the page->_mapcount directly.  We can now get
rid of page_mapcount_reset().

Link: https://lkml.kernel.org/r/20240529111904.2069608-7-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Tested-by: Sergey Senozhatsky <senozhatsky@chromium.org>	[zram/zsmalloc workloads]
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 61a7f680f0ba..4f75fee497f1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1210,11 +1210,6 @@ static inline int folio_entire_mapcount(const struct folio *folio)
 	return atomic_read(&folio->_entire_mapcount) + 1;
 }
 
-static inline void page_mapcount_reset(struct page *page)
-{
-	atomic_set(&(page)->_mapcount, -1);
-}
-
 static inline int folio_large_mapcount(const struct folio *folio)
 {
 	VM_WARN_ON_FOLIO(!folio_test_large(folio), folio);
-- 
cgit v1.2.3


From 2669324b81e5f67d409d3ad988da77c7c2a09045 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 4 Jun 2024 19:48:20 +0800
Subject: mm: remove page_maybe_dma_pinned()

After the last user of page_maybe_dma_pinned() is converted to
folio_maybe_dma_pinned(), remove page_maybe_dma_pinned() and update the
document and comment.

[wangkefeng.wang@huawei.com: fix pin_user_pages.rst underlining]
  Link: https://lkml.kernel.org/r/61b256c7-4989-44ec-83db-f34a1bd4be2d@huawei.com
Link: https://lkml.kernel.org/r/20240604114822.2089819-3-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Helge Deller <deller@gmx.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4f75fee497f1..cb4aa725252e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1918,8 +1918,8 @@ static inline struct folio *pfn_folio(unsigned long pfn)
  *
  * For more information, please see Documentation/core-api/pin_user_pages.rst.
  *
- * Return: True, if it is likely that the page has been "dma-pinned".
- * False, if the page is definitely not dma-pinned.
+ * Return: True, if it is likely that the folio has been "dma-pinned".
+ * False, if the folio is definitely not dma-pinned.
  */
 static inline bool folio_maybe_dma_pinned(struct folio *folio)
 {
@@ -1938,11 +1938,6 @@ static inline bool folio_maybe_dma_pinned(struct folio *folio)
 		GUP_PIN_COUNTING_BIAS;
 }
 
-static inline bool page_maybe_dma_pinned(struct page *page)
-{
-	return folio_maybe_dma_pinned(page_folio(page));
-}
-
 /*
  * This should most likely only be called during fork() to see whether we
  * should break the cow immediately for an anon page on the src mm.
-- 
cgit v1.2.3


From a929e0d10f3db1a53668f6b9845db27d7fb63759 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 4 Jun 2024 19:48:22 +0800
Subject: mm: remove page_mkclean()

There are no more users of page_mkclean(), remove it and update the
document and comment.

Link: https://lkml.kernel.org/r/20240604114822.2089819-5-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Helge Deller <deller@gmx.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h   | 2 +-
 include/linux/rmap.h | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cb4aa725252e..101945baffc7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1577,7 +1577,7 @@ static inline void put_page(struct page *page)
  * issue.
  *
  * Locking: the lockless algorithm described in folio_try_get_rcu()
- * provides safe operation for get_user_pages(), page_mkclean() and
+ * provides safe operation for get_user_pages(), folio_mkclean() and
  * other calls that race to set up page table entries.
  */
 #define GUP_PIN_COUNTING_BIAS (1U << 10)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 694d3d886245..980fa5d75d69 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -802,8 +802,4 @@ static inline int folio_mkclean(struct folio *folio)
 }
 #endif	/* CONFIG_MMU */
 
-static inline int page_mkclean(struct page *page)
-{
-	return folio_mkclean(page_folio(page));
-}
 #endif	/* _LINUX_RMAP_H */
-- 
cgit v1.2.3


From 503b158fc30f203a1854c87183ca3467c6466001 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 7 Jun 2024 11:09:37 +0200
Subject: mm/memory_hotplug: initialize memmap of !ZONE_DEVICE with
 PageOffline() instead of PageReserved()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We currently initialize the memmap such that PG_reserved is set and the
refcount of the page is 1.  In virtio-mem code, we have to manually clear
that PG_reserved flag to make memory offlining with partially hotplugged
memory blocks possible: has_unmovable_pages() would otherwise bail out on
such pages.

We want to avoid PG_reserved where possible and move to typed pages
instead.  Further, we want to further enlighten memory offlining code
about PG_offline: offline pages in an online memory section.  One example
is handling managed page count adjustments in a cleaner way during memory
offlining.

So let's initialize the pages with PG_offline instead of PG_reserved.
generic_online_page()->__free_pages_core() will now clear that flag before
handing that memory to the buddy.

Note that the page refcount is still 1 and would forbid offlining of such
memory except when special care is take during GOING_OFFLINE as currently
only implemented by virtio-mem.

With this change, we can now get non-PageReserved() pages in the XEN
balloon list.  From what I can tell, that can already happen via
decrease_reservation(), so that should be fine.

HV-balloon should not really observe a change: partial online memory
blocks still cannot get surprise-offlined, because the refcount of these
PageOffline() pages is 1.

Update virtio-mem, HV-balloon and XEN-balloon code to be aware that
hotplugged pages are now PageOffline() instead of PageReserved() before
they are handed over to the buddy.

We'll leave the ZONE_DEVICE case alone for now.

Note that self-hosted vmemmap pages will no longer be marked as
reserved.  This matches ordinary vmemmap pages allocated from the buddy
during memory hotplug.  Now, really only vmemmap pages allocated from
memblock during early boot will be marked reserved.  Existing
PageReserved() checks seem to be handling all relevant cases correctly
even after this change.

Link: https://lkml.kernel.org/r/20240607090939.89524-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de> [generic memory-hotplug bits]
Cc: Alexander Potapenko <glider@google.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eugenio Pérez <eperezma@redhat.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Marco Elver <elver@google.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0f7c7320391e..b23772b08cc1 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -30,16 +30,11 @@
  * - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying
  *   to read/write these pages might end badly. Don't touch!
  * - The zero page(s)
- * - Pages not added to the page allocator when onlining a section because
- *   they were excluded via the online_page_callback() or because they are
- *   PG_hwpoison.
  * - Pages allocated in the context of kexec/kdump (loaded kernel image,
  *   control pages, vmcoreinfo)
  * - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are
  *   not marked PG_reserved (as they might be in use by somebody else who does
  *   not respect the caching strategy).
- * - Pages part of an offline section (struct pages of offline sections should
- *   not be trusted as they will be initialized when first onlined).
  * - MCA pages on ia64
  * - Pages holding CPU notes for POWER Firmware Assisted Dump
  * - Device memory (e.g. PMEM, DAX, HMM)
@@ -1020,6 +1015,10 @@ PAGE_TYPE_OPS(Buddy, buddy, buddy)
  * The content of these pages is effectively stale. Such pages should not
  * be touched (read/write/dump/save) except by their owner.
  *
+ * When a memory block gets onlined, all pages are initialized with a
+ * refcount of 1 and PageOffline(). generic_online_page() will
+ * take care of clearing PageOffline().
+ *
  * If a driver wants to allow to offline unmovable PageOffline() pages without
  * putting them back to the buddy, it can do so via the memory notifier by
  * decrementing the reference count in MEM_GOING_OFFLINE and incrementing the
@@ -1027,8 +1026,7 @@ PAGE_TYPE_OPS(Buddy, buddy, buddy)
  * pages (now with a reference count of zero) are treated like free pages,
  * allowing the containing memory block to get offlined. A driver that
  * relies on this feature is aware that re-onlining the memory block will
- * require to re-set the pages PageOffline() and not giving them to the
- * buddy via online_page_callback_t.
+ * require not giving them to the buddy via generic_online_page().
  *
  * There are drivers that mark a page PageOffline() and expect there won't be
  * any further access to page content. PFN walkers that read content of random
-- 
cgit v1.2.3


From 50625744220c101705a989d7c57a6c16e945f3b1 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 7 Jun 2024 11:09:38 +0200
Subject: mm/memory_hotplug: skip adjust_managed_page_count() for PageOffline()
 pages when offlining
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We currently have a hack for virtio-mem in place to handle memory
offlining with PageOffline pages for which we already adjusted the managed
page count.

Let's enlighten memory offlining code so we can get rid of that hack, and
document the situation.

Link: https://lkml.kernel.org/r/20240607090939.89524-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eugenio Pérez <eperezma@redhat.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Marco Elver <elver@google.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 4 ++--
 include/linux/page-flags.h     | 8 ++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 7a9ff464608d..ebe876930e78 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -175,8 +175,8 @@ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
 extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
 extern int online_pages(unsigned long pfn, unsigned long nr_pages,
 			struct zone *zone, struct memory_group *group);
-extern void __offline_isolated_pages(unsigned long start_pfn,
-				     unsigned long end_pfn);
+extern unsigned long __offline_isolated_pages(unsigned long start_pfn,
+		unsigned long end_pfn);
 
 typedef void (*online_page_callback_t)(struct page *page, unsigned int order);
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b23772b08cc1..1d441908b726 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -1023,11 +1023,15 @@ PAGE_TYPE_OPS(Buddy, buddy, buddy)
  * putting them back to the buddy, it can do so via the memory notifier by
  * decrementing the reference count in MEM_GOING_OFFLINE and incrementing the
  * reference count in MEM_CANCEL_OFFLINE. When offlining, the PageOffline()
- * pages (now with a reference count of zero) are treated like free pages,
- * allowing the containing memory block to get offlined. A driver that
+ * pages (now with a reference count of zero) are treated like free (unmanaged)
+ * pages, allowing the containing memory block to get offlined. A driver that
  * relies on this feature is aware that re-onlining the memory block will
  * require not giving them to the buddy via generic_online_page().
  *
+ * Memory offlining code will not adjust the managed page count for any
+ * PageOffline() pages, treating them like they were never exposed to the
+ * buddy using generic_online_page().
+ *
  * There are drivers that mark a page PageOffline() and expect there won't be
  * any further access to page content. PFN walkers that read content of random
  * pages should check PageOffline() and synchronize with such drivers using
-- 
cgit v1.2.3


From 15bde4abab734c687c1f81704886aba3a70c268e Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Tue, 18 Jun 2024 11:11:35 +1200
Subject: mm: extend rmap flags arguments for folio_add_new_anon_rmap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm: clarify folio_add_new_anon_rmap() and
__folio_add_anon_rmap()", v2.

This patchset is preparatory work for mTHP swapin.

folio_add_new_anon_rmap() assumes that new anon rmaps are always
exclusive.  However, this assumption doesn’t hold true for cases like
do_swap_page(), where a new anon might be added to the swapcache and is
not necessarily exclusive.

The patchset extends the rmap flags to allow folio_add_new_anon_rmap() to
handle both exclusive and non-exclusive new anon folios.  The
do_swap_page() function is updated to use this extended API with rmap
flags.  Consequently, all new anon folios now consistently use
folio_add_new_anon_rmap().  The special case for !folio_test_anon() in
__folio_add_anon_rmap() can be safely removed.

In conclusion, new anon folios always use folio_add_new_anon_rmap(),
regardless of exclusivity.  Old anon folios continue to use
__folio_add_anon_rmap() via folio_add_anon_rmap_pmd() and
folio_add_anon_rmap_ptes().


This patch (of 3):

In the case of a swap-in, a new anonymous folio is not necessarily
exclusive.  This patch updates the rmap flags to allow a new anonymous
folio to be treated as either exclusive or non-exclusive.  To maintain the
existing behavior, we always use EXCLUSIVE as the default setting.

[akpm@linux-foundation.org: cleanup and constifications per David and akpm]
[v-songbaohua@oppo.com: fix missing doc for flags of folio_add_new_anon_rmap()]
  Link: https://lkml.kernel.org/r/20240619210641.62542-1-21cnbao@gmail.com
[v-songbaohua@oppo.com: enhance doc for extend rmap flags arguments for folio_add_new_anon_rmap]
  Link: https://lkml.kernel.org/r/20240622030256.43775-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20240617231137.80726-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20240617231137.80726-2-21cnbao@gmail.com
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Tested-by: Shuai Yuan <yuanshuai@oppo.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/rmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 980fa5d75d69..0978c64f49d8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -244,7 +244,7 @@ void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages,
 void folio_add_anon_rmap_pmd(struct folio *, struct page *,
 		struct vm_area_struct *, unsigned long address, rmap_t flags);
 void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
-		unsigned long address);
+		unsigned long address, rmap_t flags);
 void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages,
 		struct vm_area_struct *);
 #define folio_add_file_rmap_pte(folio, page, vma) \
-- 
cgit v1.2.3


From 78fefd04c123493bbf28434768fa577b2153c79b Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 18 Jun 2024 17:12:39 +0800
Subject: mm: memory: convert clear_huge_page() to folio_zero_user()

Patch series "mm: improve clear and copy user folio", v2.

Some folio conversions.  An improvement is to move address alignment into
the caller as it is only needed if we don't know which address will be
accessed when clearing/copying user folios.


This patch (of 4):

Replace clear_huge_page() with folio_zero_user(), and take a folio
instead of a page. Directly get number of pages by folio_nr_pages()
to remove pages_per_huge_page argument, furthermore, move the address
alignment from folio_zero_user() to the callers since the alignment
is only needed when we don't know which address will be accessed.

Link: https://lkml.kernel.org/r/20240618091242.2140164-1-wangkefeng.wang@huawei.com
Link: https://lkml.kernel.org/r/20240618091242.2140164-2-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 101945baffc7..e2140ea6ae98 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4067,9 +4067,7 @@ enum mf_action_page_type {
 };
 
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
-extern void clear_huge_page(struct page *page,
-			    unsigned long addr_hint,
-			    unsigned int pages_per_huge_page);
+void folio_zero_user(struct folio *folio, unsigned long addr_hint);
 int copy_user_large_folio(struct folio *dst, struct folio *src,
 			  unsigned long addr_hint,
 			  struct vm_area_struct *vma);
-- 
cgit v1.2.3


From dc9e6f7053dd540db2c9fba30c31a07de5edab01 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 31 May 2024 14:56:16 +0200
Subject: mm: read page_type using READ_ONCE

KCSAN complains about possible data races: while we check for a page_type
-- for example for sanity checks -- we might concurrently modify the
mapcount that overlays page_type.

Let's use READ_ONCE to avoid load tearing (shouldn't make a difference)
and to make KCSAN happy.

Likely, we might also want to use WRITE_ONCE for the writer side of
page_type, if KCSAN ever complains about that.  But we'll not mess with
that for now.

Note: nothing should really be broken besides wrong KCSAN complaints.  The
sanity check that triggers this was added in commit 68f0320824fa
("mm/rmap: convert folio_add_file_rmap_range() into
folio_add_file_rmap_[pte|ptes|pmd]()").  Even before that similar races
likely where possible, ever since we added page_type in commit
6e292b9be7f4 ("mm: split page_type out from _mapcount").

Link: https://lkml.kernel.org/r/20240531125616.2850153-1-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202405281431.c46a3be9-lkp@intel.com
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-flags.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 1d441908b726..5769fe6e4950 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -955,9 +955,9 @@ enum pagetype {
 };
 
 #define PageType(page, flag)						\
-	((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
+	((READ_ONCE(page->page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
 #define folio_test_type(folio, flag)					\
-	((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
+	((READ_ONCE(folio->page.page_type) & (PAGE_TYPE_BASE | flag))  == PAGE_TYPE_BASE)
 
 static inline int page_type_has_type(unsigned int page_type)
 {
@@ -966,7 +966,7 @@ static inline int page_type_has_type(unsigned int page_type)
 
 static inline int page_has_type(const struct page *page)
 {
-	return page_type_has_type(page->page_type);
+	return page_type_has_type(READ_ONCE(page->page_type));
 }
 
 #define FOLIO_TYPE_OPS(lname, fname)					\
-- 
cgit v1.2.3


From 6b1709d4b7fce27c6c79fc78c17c458f9848a4d8 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 21 Jun 2024 13:34:53 +0200
Subject: kmsan: expose kmsan_get_metadata()

Each s390 CPU has lowcore pages associated with it.  Each CPU sees its own
lowcore at virtual address 0 through a hardware mechanism called
prefixing.  Additionally, all lowcores are mapped to non-0 virtual
addresses stored in the lowcore_ptr[] array.

When lowcore is accessed through virtual address 0, one needs to resolve
metadata for lowcore_ptr[raw_smp_processor_id()].

Expose kmsan_get_metadata() to make it possible to do this from the arch
code.

Link: https://lkml.kernel.org/r/20240621113706.315500-10-iii@linux.ibm.com
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <kasan-dev@googlegroups.com>
Cc: Marco Elver <elver@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kmsan.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h
index e0c23a32cdf0..fe6c2212bdb1 100644
--- a/include/linux/kmsan.h
+++ b/include/linux/kmsan.h
@@ -230,6 +230,15 @@ void kmsan_handle_urb(const struct urb *urb, bool is_out);
  */
 void kmsan_unpoison_entry_regs(const struct pt_regs *regs);
 
+/**
+ * kmsan_get_metadata() - Return a pointer to KMSAN shadow or origins.
+ * @addr:      kernel address.
+ * @is_origin: whether to return origins or shadow.
+ *
+ * Return NULL if metadata cannot be found.
+ */
+void *kmsan_get_metadata(void *addr, bool is_origin);
+
 #else
 
 static inline void kmsan_init_shadow(void)
-- 
cgit v1.2.3


From ec3e837d8fd96c68599b2861dd412094b7bc335c Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 21 Jun 2024 13:34:55 +0200
Subject: kmsan: allow disabling KMSAN checks for the current task

Like for KASAN, it's useful to temporarily disable KMSAN checks around,
e.g., redzone accesses.  Introduce kmsan_disable_current() and
kmsan_enable_current(), which are similar to their KASAN counterparts.

Make them reentrant in order to handle memory allocations in interrupt
context.  Repurpose the allow_reporting field for this.

Link: https://lkml.kernel.org/r/20240621113706.315500-12-iii@linux.ibm.com
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <kasan-dev@googlegroups.com>
Cc: Marco Elver <elver@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kmsan.h       | 24 ++++++++++++++++++++++++
 include/linux/kmsan_types.h |  2 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h
index fe6c2212bdb1..14b5ea6d3a43 100644
--- a/include/linux/kmsan.h
+++ b/include/linux/kmsan.h
@@ -239,6 +239,22 @@ void kmsan_unpoison_entry_regs(const struct pt_regs *regs);
  */
 void *kmsan_get_metadata(void *addr, bool is_origin);
 
+/**
+ * kmsan_enable_current(): Enable KMSAN for the current task.
+ *
+ * Each kmsan_enable_current() current call must be preceded by a
+ * kmsan_disable_current() call. These call pairs may be nested.
+ */
+void kmsan_enable_current(void);
+
+/**
+ * kmsan_disable_current(): Disable KMSAN for the current task.
+ *
+ * Each kmsan_disable_current() current call must be followed by a
+ * kmsan_enable_current() call. These call pairs may be nested.
+ */
+void kmsan_disable_current(void);
+
 #else
 
 static inline void kmsan_init_shadow(void)
@@ -338,6 +354,14 @@ static inline void kmsan_unpoison_entry_regs(const struct pt_regs *regs)
 {
 }
 
+static inline void kmsan_enable_current(void)
+{
+}
+
+static inline void kmsan_disable_current(void)
+{
+}
+
 #endif
 
 #endif /* _LINUX_KMSAN_H */
diff --git a/include/linux/kmsan_types.h b/include/linux/kmsan_types.h
index 929287981afe..dfc59918b3c0 100644
--- a/include/linux/kmsan_types.h
+++ b/include/linux/kmsan_types.h
@@ -31,7 +31,7 @@ struct kmsan_context_state {
 struct kmsan_ctx {
 	struct kmsan_context_state cstate;
 	int kmsan_in_runtime;
-	bool allow_reporting;
+	unsigned int depth;
 };
 
 #endif /* _LINUX_KMSAN_TYPES_H */
-- 
cgit v1.2.3


From 1fdb3c7006d9914e4b070f7eee98dfbdf743ee16 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 21 Jun 2024 13:34:56 +0200
Subject: kmsan: introduce memset_no_sanitize_memory()

Add a wrapper for memset() that prevents unpoisoning.  This is useful for
filling memory allocator redzones.

Link: https://lkml.kernel.org/r/20240621113706.315500-13-iii@linux.ibm.com
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <kasan-dev@googlegroups.com>
Cc: Marco Elver <elver@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kmsan.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h
index 14b5ea6d3a43..7109644f4c19 100644
--- a/include/linux/kmsan.h
+++ b/include/linux/kmsan.h
@@ -255,6 +255,19 @@ void kmsan_enable_current(void);
  */
 void kmsan_disable_current(void);
 
+/**
+ * memset_no_sanitize_memory(): Fill memory without KMSAN instrumentation.
+ * @s: address of kernel memory to fill.
+ * @c: constant byte to fill the memory with.
+ * @n: number of bytes to fill.
+ *
+ * This is like memset(), but without KMSAN instrumentation.
+ */
+static inline void *memset_no_sanitize_memory(void *s, int c, size_t n)
+{
+	return __memset(s, c, n);
+}
+
 #else
 
 static inline void kmsan_init_shadow(void)
@@ -362,6 +375,11 @@ static inline void kmsan_disable_current(void)
 {
 }
 
+static inline void *memset_no_sanitize_memory(void *s, int c, size_t n)
+{
+	return memset(s, c, n);
+}
+
 #endif
 
 #endif /* _LINUX_KMSAN_H */
-- 
cgit v1.2.3


From e6553e2f79b2673b239c3dd47a88451119eb82d9 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 21 Jun 2024 13:35:00 +0200
Subject: kmsan: expose KMSAN_WARN_ON()

KMSAN_WARN_ON() is required for implementing s390-specific KMSAN
functions, but right now it's available only to the KMSAN internal
functions.  Expose it to subsystems through <linux/kmsan.h>.

Link: https://lkml.kernel.org/r/20240621113706.315500-17-iii@linux.ibm.com
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <kasan-dev@googlegroups.com>
Cc: Marco Elver <elver@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kmsan.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h
index 7109644f4c19..2b1432cc16d5 100644
--- a/include/linux/kmsan.h
+++ b/include/linux/kmsan.h
@@ -268,6 +268,29 @@ static inline void *memset_no_sanitize_memory(void *s, int c, size_t n)
 	return __memset(s, c, n);
 }
 
+extern bool kmsan_enabled;
+extern int panic_on_kmsan;
+
+/*
+ * KMSAN performs a lot of consistency checks that are currently enabled by
+ * default. BUG_ON is normally discouraged in the kernel, unless used for
+ * debugging, but KMSAN itself is a debugging tool, so it makes little sense to
+ * recover if something goes wrong.
+ */
+#define KMSAN_WARN_ON(cond)                                           \
+	({                                                            \
+		const bool __cond = WARN_ON(cond);                    \
+		if (unlikely(__cond)) {                               \
+			WRITE_ONCE(kmsan_enabled, false);             \
+			if (panic_on_kmsan) {                         \
+				/* Can't call panic() here because */ \
+				/* of uaccess checks. */              \
+				BUG();                                \
+			}                                             \
+		}                                                     \
+		__cond;                                               \
+	})
+
 #else
 
 static inline void kmsan_init_shadow(void)
@@ -380,6 +403,8 @@ static inline void *memset_no_sanitize_memory(void *s, int c, size_t n)
 	return memset(s, c, n);
 }
 
+#define KMSAN_WARN_ON WARN_ON
+
 #endif
 
 #endif /* _LINUX_KMSAN_H */
-- 
cgit v1.2.3


From ee86814b0562f18255b55c5e6a01a022895994cf Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 20 Jun 2024 23:29:35 +0200
Subject: mm/migrate: move NUMA hinting fault folio isolation + checks under
 PTL

Currently we always take a folio reference even if migration will not even
be tried or isolation failed, requiring us to grab+drop an additional
reference.

Further, we end up calling folio_likely_mapped_shared() while the folio
might have already been unmapped, because after we dropped the PTL, that
can easily happen.  We want to stop touching mapcounts and friends from
such context, and only call folio_likely_mapped_shared() while the folio
is still mapped: mapcount information is pretty much stale and unreliable
otherwise.

So let's move checks into numamigrate_isolate_folio(), rename that
function to migrate_misplaced_folio_prepare(), and call that function from
callsites where we call migrate_misplaced_folio(), but still with the PTL
held.

We can now stop taking temporary folio references, and really only take a
reference if folio isolation succeeded.  Doing the
folio_likely_mapped_shared() + folio isolation under PT lock is now
similar to how we handle MADV_PAGEOUT.

While at it, combine the folio_is_file_lru() checks.

[david@redhat.com: fix list_del() corruption]
  Link: https://lkml.kernel.org/r/8f85c31a-e603-4578-bf49-136dae0d4b69@redhat.com
  Link: https://lkml.kernel.org/r/20240626191129.658CFC32782@smtp.kernel.org
Link: https://lkml.kernel.org/r/20240620212935.656243-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Tested-by: Donet Tom <donettom@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/migrate.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 517f70b70620..af2579ae93f2 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -140,9 +140,16 @@ const struct movable_operations *page_movable_ops(struct page *page)
 }
 
 #ifdef CONFIG_NUMA_BALANCING
+int migrate_misplaced_folio_prepare(struct folio *folio,
+		struct vm_area_struct *vma, int node);
 int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma,
 			   int node);
 #else
+static inline int migrate_misplaced_folio_prepare(struct folio *folio,
+		struct vm_area_struct *vma, int node)
+{
+	return -EAGAIN; /* can't migrate now */
+}
 static inline int migrate_misplaced_folio(struct folio *folio,
 					 struct vm_area_struct *vma, int node)
 {
-- 
cgit v1.2.3


From bb82ac31ddcedd6a1e6ee30b7afec7acdef811e1 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Jun 2024 12:18:55 +0200
Subject: readahead: drop index argument of page_cache_async_readahead()

The index argument of page_cache_async_readahead() is just folio->index so
there's no point in passing is separately.  Drop it.

Link: https://lkml.kernel.org/r/20240625101909.12234-5-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Tested-by: Zhang Peng <zhangpengpeng0808@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e37e16ebff7a..eef99375dcf1 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1293,8 +1293,7 @@ void page_cache_sync_readahead(struct address_space *mapping,
  * @mapping: address_space which holds the pagecache and I/O vectors
  * @ra: file_ra_state which holds the readahead state
  * @file: Used by the filesystem for authentication.
- * @folio: The folio at @index which triggered the readahead call.
- * @index: Index of first page to be read.
+ * @folio: The folio which triggered the readahead call.
  * @req_count: Total number of pages being read by the caller.
  *
  * page_cache_async_readahead() should be called when a page is used which
@@ -1305,9 +1304,9 @@ void page_cache_sync_readahead(struct address_space *mapping,
 static inline
 void page_cache_async_readahead(struct address_space *mapping,
 		struct file_ra_state *ra, struct file *file,
-		struct folio *folio, pgoff_t index, unsigned long req_count)
+		struct folio *folio, unsigned long req_count)
 {
-	DEFINE_READAHEAD(ractl, file, ra, mapping, index);
+	DEFINE_READAHEAD(ractl, file, ra, mapping, folio->index);
 	page_cache_async_ra(&ractl, folio, req_count);
 }
 
-- 
cgit v1.2.3


From af48f95492dc1af36d9636a750ec492035c0ed7d Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Mon, 1 Jul 2024 15:40:48 +0300
Subject: RDMA/core: Introduce "name_assign_type" for an IB device

The name_assign_type indicates how the name is provided. Currently
these types are supported:
- RDMA_NAME_ASSIGN_TYPE_UNKNOWN: Unknown or not set;
- RDMA_NAME_ASSIGN_TYPE_USER: Name is provided by the user; The
  user-created sub device, rxe and siw device has this type.

When filling nl device info, it is set in the new attribute
RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE. User-space tools like udev
"rdma_rename" could check this attribute to determine if this
device needs to be renamed or not.

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/522591bef9a369cc8e5dcb77787e017bffee37fe.1719837610.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/ib_verbs.h          | 8 ++++++++
 include/uapi/rdma/rdma_netlink.h | 9 +++++++++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e09d4f09b602..6c5712ae559d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2794,6 +2794,8 @@ struct ib_device {
 	enum rdma_nl_dev_type type;
 	struct ib_device *parent;
 	struct list_head subdev_list;
+
+	enum rdma_nl_name_assign_type name_assign_type;
 };
 
 static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size,
@@ -4867,4 +4869,10 @@ int ib_add_sub_device(struct ib_device *parent,
  * Return 0 on success, an error code otherwise
  */
 int ib_del_sub_device_and_put(struct ib_device *sub);
+
+static inline void ib_mark_name_assigned_by_user(struct ib_device *ibdev)
+{
+	ibdev->name_assign_type = RDMA_NAME_ASSIGN_TYPE_USER;
+}
+
 #endif /* IB_VERBS_H */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 4b69242d7848..2f37568f5556 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -572,6 +572,8 @@ enum rdma_nldev_attr {
 
 	RDMA_NLDEV_ATTR_PARENT_NAME,		/* string */
 
+	RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,	/* u8 */
+
 	/*
 	 * Always the end
 	 */
@@ -615,4 +617,11 @@ enum rdma_nl_counter_mask {
 enum rdma_nl_dev_type {
 	RDMA_DEVICE_TYPE_SMI = 1,
 };
+
+/* RDMA device name assignment types */
+enum rdma_nl_name_assign_type {
+	RDMA_NAME_ASSIGN_TYPE_UNKNOWN = 0,
+	RDMA_NAME_ASSIGN_TYPE_USER = 1, /* Provided by user-space */
+};
+
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3


From fa2690af573dfefb47ba6eef888797a64b6b5f3c Mon Sep 17 00:00:00 2001
From: Yang Shi <yang@os.amperecomputing.com>
Date: Tue, 25 Jun 2024 13:53:50 -0700
Subject: mm: page_ref: remove folio_try_get_rcu()

The below bug was reported on a non-SMP kernel:

[  275.267158][ T4335] ------------[ cut here ]------------
[  275.267949][ T4335] kernel BUG at include/linux/page_ref.h:275!
[  275.268526][ T4335] invalid opcode: 0000 [#1] KASAN PTI
[  275.269001][ T4335] CPU: 0 PID: 4335 Comm: trinity-c3 Not tainted 6.7.0-rc4-00061-gefa7df3e3bb5 #1
[  275.269787][ T4335] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[  275.270679][ T4335] RIP: 0010:try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3))
[  275.272813][ T4335] RSP: 0018:ffffc90005dcf650 EFLAGS: 00010202
[  275.273346][ T4335] RAX: 0000000000000246 RBX: ffffea00066e0000 RCX: 0000000000000000
[  275.274032][ T4335] RDX: fffff94000cdc007 RSI: 0000000000000004 RDI: ffffea00066e0034
[  275.274719][ T4335] RBP: ffffea00066e0000 R08: 0000000000000000 R09: fffff94000cdc006
[  275.275404][ T4335] R10: ffffea00066e0037 R11: 0000000000000000 R12: 0000000000000136
[  275.276106][ T4335] R13: ffffea00066e0034 R14: dffffc0000000000 R15: ffffea00066e0008
[  275.276790][ T4335] FS:  00007fa2f9b61740(0000) GS:ffffffff89d0d000(0000) knlGS:0000000000000000
[  275.277570][ T4335] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  275.278143][ T4335] CR2: 00007fa2f6c00000 CR3: 0000000134b04000 CR4: 00000000000406f0
[  275.278833][ T4335] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  275.279521][ T4335] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  275.280201][ T4335] Call Trace:
[  275.280499][ T4335]  <TASK>
[ 275.280751][ T4335] ? die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434 arch/x86/kernel/dumpstack.c:447)
[ 275.281087][ T4335] ? do_trap (arch/x86/kernel/traps.c:112 arch/x86/kernel/traps.c:153)
[ 275.281463][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3))
[ 275.281884][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3))
[ 275.282300][ T4335] ? do_error_trap (arch/x86/kernel/traps.c:174)
[ 275.282711][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3))
[ 275.283129][ T4335] ? handle_invalid_op (arch/x86/kernel/traps.c:212)
[ 275.283561][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3))
[ 275.283990][ T4335] ? exc_invalid_op (arch/x86/kernel/traps.c:264)
[ 275.284415][ T4335] ? asm_exc_invalid_op (arch/x86/include/asm/idtentry.h:568)
[ 275.284859][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3))
[ 275.285278][ T4335] try_grab_folio (mm/gup.c:148)
[ 275.285684][ T4335] __get_user_pages (mm/gup.c:1297 (discriminator 1))
[ 275.286111][ T4335] ? __pfx___get_user_pages (mm/gup.c:1188)
[ 275.286579][ T4335] ? __pfx_validate_chain (kernel/locking/lockdep.c:3825)
[ 275.287034][ T4335] ? mark_lock (kernel/locking/lockdep.c:4656 (discriminator 1))
[ 275.287416][ T4335] __gup_longterm_locked (mm/gup.c:1509 mm/gup.c:2209)
[ 275.288192][ T4335] ? __pfx___gup_longterm_locked (mm/gup.c:2204)
[ 275.288697][ T4335] ? __pfx_lock_acquire (kernel/locking/lockdep.c:5722)
[ 275.289135][ T4335] ? __pfx___might_resched (kernel/sched/core.c:10106)
[ 275.289595][ T4335] pin_user_pages_remote (mm/gup.c:3350)
[ 275.290041][ T4335] ? __pfx_pin_user_pages_remote (mm/gup.c:3350)
[ 275.290545][ T4335] ? find_held_lock (kernel/locking/lockdep.c:5244 (discriminator 1))
[ 275.290961][ T4335] ? mm_access (kernel/fork.c:1573)
[ 275.291353][ T4335] process_vm_rw_single_vec+0x142/0x360
[ 275.291900][ T4335] ? __pfx_process_vm_rw_single_vec+0x10/0x10
[ 275.292471][ T4335] ? mm_access (kernel/fork.c:1573)
[ 275.292859][ T4335] process_vm_rw_core+0x272/0x4e0
[ 275.293384][ T4335] ? hlock_class (arch/x86/include/asm/bitops.h:227 arch/x86/include/asm/bitops.h:239 include/asm-generic/bitops/instrumented-non-atomic.h:142 kernel/locking/lockdep.c:228)
[ 275.293780][ T4335] ? __pfx_process_vm_rw_core+0x10/0x10
[ 275.294350][ T4335] process_vm_rw (mm/process_vm_access.c:284)
[ 275.294748][ T4335] ? __pfx_process_vm_rw (mm/process_vm_access.c:259)
[ 275.295197][ T4335] ? __task_pid_nr_ns (include/linux/rcupdate.h:306 (discriminator 1) include/linux/rcupdate.h:780 (discriminator 1) kernel/pid.c:504 (discriminator 1))
[ 275.295634][ T4335] __x64_sys_process_vm_readv (mm/process_vm_access.c:291)
[ 275.296139][ T4335] ? syscall_enter_from_user_mode (kernel/entry/common.c:94 kernel/entry/common.c:112)
[ 275.296642][ T4335] do_syscall_64 (arch/x86/entry/common.c:51 (discriminator 1) arch/x86/entry/common.c:82 (discriminator 1))
[ 275.297032][ T4335] ? __task_pid_nr_ns (include/linux/rcupdate.h:306 (discriminator 1) include/linux/rcupdate.h:780 (discriminator 1) kernel/pid.c:504 (discriminator 1))
[ 275.297470][ T4335] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4300 kernel/locking/lockdep.c:4359)
[ 275.297988][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97)
[ 275.298389][ T4335] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4300 kernel/locking/lockdep.c:4359)
[ 275.298906][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97)
[ 275.299304][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97)
[ 275.299703][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97)
[ 275.300115][ T4335] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129)

This BUG is the VM_BUG_ON(!in_atomic() && !irqs_disabled()) assertion in
folio_ref_try_add_rcu() for non-SMP kernel.

The process_vm_readv() calls GUP to pin the THP. An optimization for
pinning THP instroduced by commit 57edfcfd3419 ("mm/gup: accelerate thp
gup even for "pages != NULL"") calls try_grab_folio() to pin the THP,
but try_grab_folio() is supposed to be called in atomic context for
non-SMP kernel, for example, irq disabled or preemption disabled, due to
the optimization introduced by commit e286781d5f2e ("mm: speculative
page references").

The commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP
boundaries") is not actually the root cause although it was bisected to.
It just makes the problem exposed more likely.

The follow up discussion suggested the optimization for non-SMP kernel
may be out-dated and not worth it anymore [1].  So removing the
optimization to silence the BUG.

However calling try_grab_folio() in GUP slow path actually is
unnecessary, so the following patch will clean this up.

[1] https://lore.kernel.org/linux-mm/821cf1d6-92b9-4ac4-bacc-d8f2364ac14f@paulmck-laptop/

Link: https://lkml.kernel.org/r/20240625205350.1777481-1-yang@os.amperecomputing.com
Fixes: 57edfcfd3419 ("mm/gup: accelerate thp gup even for "pages != NULL"")
Signed-off-by: Yang Shi <yang@os.amperecomputing.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Tested-by: Oliver Sang <oliver.sang@intel.com>
Acked-by: Peter Xu <peterx@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Vivek Kasireddy <vivek.kasireddy@intel.com>
Cc: <stable@vger.kernel.org>	[6.6+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page_ref.h | 49 ++----------------------------------------------
 1 file changed, 2 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 1acf5bac7f50..490d0ad6e56d 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -258,54 +258,9 @@ static inline bool folio_try_get(struct folio *folio)
 	return folio_ref_add_unless(folio, 1, 0);
 }
 
-static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
-{
-#ifdef CONFIG_TINY_RCU
-	/*
-	 * The caller guarantees the folio will not be freed from interrupt
-	 * context, so (on !SMP) we only need preemption to be disabled
-	 * and TINY_RCU does that for us.
-	 */
-# ifdef CONFIG_PREEMPT_COUNT
-	VM_BUG_ON(!in_atomic() && !irqs_disabled());
-# endif
-	VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
-	folio_ref_add(folio, count);
-#else
-	if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
-		/* Either the folio has been freed, or will be freed. */
-		return false;
-	}
-#endif
-	return true;
-}
-
-/**
- * folio_try_get_rcu - Attempt to increase the refcount on a folio.
- * @folio: The folio.
- *
- * This is a version of folio_try_get() optimised for non-SMP kernels.
- * If you are still holding the rcu_read_lock() after looking up the
- * page and know that the page cannot have its refcount decreased to
- * zero in interrupt context, you can use this instead of folio_try_get().
- *
- * Example users include get_user_pages_fast() (as pages are not unmapped
- * from interrupt context) and the page cache lookups (as pages are not
- * truncated from interrupt context).  We also know that pages are not
- * frozen in interrupt context for the purposes of splitting or migration.
- *
- * You can also use this function if you're holding a lock that prevents
- * pages being frozen & removed; eg the i_pages lock for the page cache
- * or the mmap_lock or page table lock for page tables.  In this case,
- * it will always succeed, and you could have used a plain folio_get(),
- * but it's sometimes more convenient to have a common function called
- * from both locked and RCU-protected contexts.
- *
- * Return: True if the reference count was successfully incremented.
- */
-static inline bool folio_try_get_rcu(struct folio *folio)
+static inline bool folio_ref_try_add(struct folio *folio, int count)
 {
-	return folio_ref_try_add_rcu(folio, 1);
+	return folio_ref_add_unless(folio, count, 0);
 }
 
 static inline int page_ref_freeze(struct page *page, int count)
-- 
cgit v1.2.3


From 82f0b6f041fad768c28b4ad05a683065412c226e Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Tue, 25 Jun 2024 20:16:39 -0400
Subject: mm: prevent derefencing NULL ptr in pfn_section_valid()

Commit 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing
memory_section->usage") changed pfn_section_valid() to add a READ_ONCE()
call around "ms->usage" to fix a race with section_deactivate() where
ms->usage can be cleared.  The READ_ONCE() call, by itself, is not enough
to prevent NULL pointer dereference.  We need to check its value before
dereferencing it.

Link: https://lkml.kernel.org/r/20240626001639.1350646-1-longman@redhat.com
Fixes: 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage")
Signed-off-by: Waiman Long <longman@redhat.com>
Cc: Charan Teja Kalla <quic_charante@quicinc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 586a8f0104d7..1dc6248feb83 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1979,8 +1979,9 @@ static inline int subsection_map_index(unsigned long pfn)
 static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
 {
 	int idx = subsection_map_index(pfn);
+	struct mem_section_usage *usage = READ_ONCE(ms->usage);
 
-	return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);
+	return usage ? test_bit(idx, usage->subsection_map) : 0;
 }
 #else
 static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
-- 
cgit v1.2.3


From 099d90642a711caae377f53309abfe27e8724a8b Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 27 Jun 2024 10:39:49 +1000
Subject: mm/filemap: make MAX_PAGECACHE_ORDER acceptable to xarray

Patch series "mm/filemap: Limit page cache size to that supported by
xarray", v2.

Currently, xarray can't support arbitrary page cache size.  More details
can be found from the WARN_ON() statement in xas_split_alloc().  In our
test whose code is attached below, we hit the WARN_ON() on ARM64 system
where the base page size is 64KB and huge page size is 512MB.  The issue
was reported long time ago and some discussions on it can be found here
[1].

[1] https://www.spinics.net/lists/linux-xfs/msg75404.html

In order to fix the issue, we need to adjust MAX_PAGECACHE_ORDER to one
supported by xarray and avoid PMD-sized page cache if needed.  The code
changes are suggested by David Hildenbrand.

PATCH[1] adjusts MAX_PAGECACHE_ORDER to that supported by xarray
PATCH[2-3] avoids PMD-sized page cache in the synchronous readahead path
PATCH[4] avoids PMD-sized page cache for shmem files if needed

Test program
============
# cat test.c
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/syscall.h>
#include <sys/mman.h>

#define TEST_XFS_FILENAME	"/tmp/data"
#define TEST_SHMEM_FILENAME	"/dev/shm/data"
#define TEST_MEM_SIZE		0x20000000

int main(int argc, char **argv)
{
	const char *filename;
	int fd = 0;
	void *buf = (void *)-1, *p;
	int pgsize = getpagesize();
	int ret;

	if (pgsize != 0x10000) {
		fprintf(stderr, "64KB base page size is required\n");
		return -EPERM;
	}

	system("echo force > /sys/kernel/mm/transparent_hugepage/shmem_enabled");
	system("rm -fr /tmp/data");
	system("rm -fr /dev/shm/data");
	system("echo 1 > /proc/sys/vm/drop_caches");

	/* Open xfs or shmem file */
	filename = TEST_XFS_FILENAME;
	if (argc > 1 && !strcmp(argv[1], "shmem"))
		filename = TEST_SHMEM_FILENAME;

	fd = open(filename, O_CREAT | O_RDWR | O_TRUNC);
	if (fd < 0) {
		fprintf(stderr, "Unable to open <%s>\n", filename);
		return -EIO;
	}

	/* Extend file size */
	ret = ftruncate(fd, TEST_MEM_SIZE);
	if (ret) {
		fprintf(stderr, "Error %d to ftruncate()\n", ret);
		goto cleanup;
	}

	/* Create VMA */
	buf = mmap(NULL, TEST_MEM_SIZE,
		   PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
	if (buf == (void *)-1) {
		fprintf(stderr, "Unable to mmap <%s>\n", filename);
		goto cleanup;
	}

	fprintf(stdout, "mapped buffer at 0x%p\n", buf);
	ret = madvise(buf, TEST_MEM_SIZE, MADV_HUGEPAGE);
        if (ret) {
		fprintf(stderr, "Unable to madvise(MADV_HUGEPAGE)\n");
		goto cleanup;
	}

	/* Populate VMA */
	ret = madvise(buf, TEST_MEM_SIZE, MADV_POPULATE_WRITE);
	if (ret) {
		fprintf(stderr, "Error %d to madvise(MADV_POPULATE_WRITE)\n", ret);
		goto cleanup;
	}

	/* Punch the file to enforce xarray split */
	ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
        		TEST_MEM_SIZE - pgsize, pgsize);
	if (ret)
		fprintf(stderr, "Error %d to fallocate()\n", ret);

cleanup:
	if (buf != (void *)-1)
		munmap(buf, TEST_MEM_SIZE);
	if (fd > 0)
		close(fd);

	return 0;
}

# gcc test.c -o test
# cat /proc/1/smaps | grep KernelPageSize | head -n 1
KernelPageSize:       64 kB
# ./test shmem
   :
------------[ cut here ]------------
WARNING: CPU: 17 PID: 5253 at lib/xarray.c:1025 xas_split_alloc+0xf8/0x128
Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib  \
nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct    \
nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4    \
ip_set nf_tables rfkill nfnetlink vfat fat virtio_balloon          \
drm fuse xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64  \
virtio_net sha1_ce net_failover failover virtio_console virtio_blk \
dimlib virtio_mmio
CPU: 17 PID: 5253 Comm: test Kdump: loaded Tainted: G W 6.10.0-rc5-gavin+ #12
Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-1.el9 05/24/2024
pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
pc : xas_split_alloc+0xf8/0x128
lr : split_huge_page_to_list_to_order+0x1c4/0x720
sp : ffff80008a92f5b0
x29: ffff80008a92f5b0 x28: ffff80008a92f610 x27: ffff80008a92f728
x26: 0000000000000cc0 x25: 000000000000000d x24: ffff0000cf00c858
x23: ffff80008a92f610 x22: ffffffdfc0600000 x21: 0000000000000000
x20: 0000000000000000 x19: ffffffdfc0600000 x18: 0000000000000000
x17: 0000000000000000 x16: 0000018000000000 x15: 3374004000000000
x14: 0000e00000000000 x13: 0000000000002000 x12: 0000000000000020
x11: 3374000000000000 x10: 3374e1c0ffff6000 x9 : ffffb463a84c681c
x8 : 0000000000000003 x7 : 0000000000000000 x6 : ffff00011c976ce0
x5 : ffffb463aa47e378 x4 : 0000000000000000 x3 : 0000000000000cc0
x2 : 000000000000000d x1 : 000000000000000c x0 : 0000000000000000
Call trace:
 xas_split_alloc+0xf8/0x128
 split_huge_page_to_list_to_order+0x1c4/0x720
 truncate_inode_partial_folio+0xdc/0x160
 shmem_undo_range+0x2bc/0x6a8
 shmem_fallocate+0x134/0x430
 vfs_fallocate+0x124/0x2e8
 ksys_fallocate+0x4c/0xa0
 __arm64_sys_fallocate+0x24/0x38
 invoke_syscall.constprop.0+0x7c/0xd8
 do_el0_svc+0xb4/0xd0
 el0_svc+0x44/0x1d8
 el0t_64_sync_handler+0x134/0x150
 el0t_64_sync+0x17c/0x180


This patch (of 4):

The largest page cache order can be HPAGE_PMD_ORDER (13) on ARM64 with
64KB base page size.  The xarray entry with this order can't be split as
the following error messages indicate.

------------[ cut here ]------------
WARNING: CPU: 35 PID: 7484 at lib/xarray.c:1025 xas_split_alloc+0xf8/0x128
Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib  \
nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct    \
nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4    \
ip_set rfkill nf_tables nfnetlink vfat fat virtio_balloon drm      \
fuse xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64      \
sha1_ce virtio_net net_failover virtio_console virtio_blk failover \
dimlib virtio_mmio
CPU: 35 PID: 7484 Comm: test Kdump: loaded Tainted: G W 6.10.0-rc5-gavin+ #9
Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-1.el9 05/24/2024
pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
pc : xas_split_alloc+0xf8/0x128
lr : split_huge_page_to_list_to_order+0x1c4/0x720
sp : ffff800087a4f6c0
x29: ffff800087a4f6c0 x28: ffff800087a4f720 x27: 000000001fffffff
x26: 0000000000000c40 x25: 000000000000000d x24: ffff00010625b858
x23: ffff800087a4f720 x22: ffffffdfc0780000 x21: 0000000000000000
x20: 0000000000000000 x19: ffffffdfc0780000 x18: 000000001ff40000
x17: 00000000ffffffff x16: 0000018000000000 x15: 51ec004000000000
x14: 0000e00000000000 x13: 0000000000002000 x12: 0000000000000020
x11: 51ec000000000000 x10: 51ece1c0ffff8000 x9 : ffffbeb961a44d28
x8 : 0000000000000003 x7 : ffffffdfc0456420 x6 : ffff0000e1aa6eb8
x5 : 20bf08b4fe778fca x4 : ffffffdfc0456420 x3 : 0000000000000c40
x2 : 000000000000000d x1 : 000000000000000c x0 : 0000000000000000
Call trace:
 xas_split_alloc+0xf8/0x128
 split_huge_page_to_list_to_order+0x1c4/0x720
 truncate_inode_partial_folio+0xdc/0x160
 truncate_inode_pages_range+0x1b4/0x4a8
 truncate_pagecache_range+0x84/0xa0
 xfs_flush_unmap_range+0x70/0x90 [xfs]
 xfs_file_fallocate+0xfc/0x4d8 [xfs]
 vfs_fallocate+0x124/0x2e8
 ksys_fallocate+0x4c/0xa0
 __arm64_sys_fallocate+0x24/0x38
 invoke_syscall.constprop.0+0x7c/0xd8
 do_el0_svc+0xb4/0xd0
 el0_svc+0x44/0x1d8
 el0t_64_sync_handler+0x134/0x150
 el0t_64_sync+0x17c/0x180

Fix it by decreasing MAX_PAGECACHE_ORDER to the largest supported order
by xarray. For this specific case, MAX_PAGECACHE_ORDER is dropped from
13 to 11 when CONFIG_BASE_SMALL is disabled.

Link: https://lkml.kernel.org/r/20240627003953.1262512-1-gshan@redhat.com
Link: https://lkml.kernel.org/r/20240627003953.1262512-2-gshan@redhat.com
Fixes: 793917d997df ("mm/readahead: Add large folio readahead")
Signed-off-by: Gavin Shan <gshan@redhat.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Don Dutile <ddutile@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: William Kucharski <william.kucharski@oracle.com>
Cc: Zhenyu Zhang <zhenyzha@redhat.com>
Cc: <stable@vger.kernel.org>	[5.18+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 59f1df0cde5a..a0a026d2d244 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -354,11 +354,18 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
  * a good order (that's 1MB if you're using 4kB pages)
  */
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
+#define PREFERRED_MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
 #else
-#define MAX_PAGECACHE_ORDER	8
+#define PREFERRED_MAX_PAGECACHE_ORDER	8
 #endif
 
+/*
+ * xas_split_alloc() does not support arbitrary orders. This implies no
+ * 512MB THP on ARM64 with 64KB base page size.
+ */
+#define MAX_XAS_ORDER		(XA_CHUNK_SHIFT * 2 - 1)
+#define MAX_PAGECACHE_ORDER	min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
+
 /**
  * mapping_set_large_folios() - Indicate the file supports large folios.
  * @mapping: The file.
-- 
cgit v1.2.3


From 5a4d8944d6b1e1aaaa83ea42c116b520b4ed0394 Mon Sep 17 00:00:00 2001
From: Nhat Pham <nphamcs@gmail.com>
Date: Thu, 27 Jun 2024 13:17:37 -0700
Subject: cachestat: do not flush stats in recency check

syzbot detects that cachestat() is flushing stats, which can sleep, in its
RCU read section (see [1]).  This is done in the workingset_test_recent()
step (which checks if the folio's eviction is recent).

Move the stat flushing step to before the RCU read section of cachestat,
and skip stat flushing during the recency check.

[1]: https://lore.kernel.org/cgroups/000000000000f71227061bdf97e0@google.com/

Link: https://lkml.kernel.org/r/20240627201737.3506959-1-nphamcs@gmail.com
Fixes: b00684722262 ("mm: workingset: move the stats flush into workingset_test_recent()")
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Reported-by: syzbot+b7f13b2d0cc156edf61a@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/cgroups/000000000000f71227061bdf97e0@google.com/
Debugged-by: Johannes Weiner <hannes@cmpxchg.org>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: <stable@vger.kernel.org>	[6.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index bd450023b9a4..e685e93ba354 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page)
 }
 
 /* linux/mm/workingset.c */
-bool workingset_test_recent(void *shadow, bool file, bool *workingset);
+bool workingset_test_recent(void *shadow, bool file, bool *workingset,
+				bool flush);
 void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
 void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
 void workingset_refault(struct folio *folio, void *shadow);
-- 
cgit v1.2.3


From bd225530a4c717714722c3731442b78954c765b3 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Thu, 27 Jun 2024 16:27:05 -0600
Subject: mm/hugetlb_vmemmap: fix race with speculative PFN walkers

While investigating HVO for THPs [1], it turns out that speculative PFN
walkers like compaction can race with vmemmap modifications, e.g.,

  CPU 1 (vmemmap modifier)         CPU 2 (speculative PFN walker)
  -------------------------------  ------------------------------
  Allocates an LRU folio page1
                                   Sees page1
  Frees page1

  Allocates a hugeTLB folio page2
  (page1 being a tail of page2)

  Updates vmemmap mapping page1
                                   get_page_unless_zero(page1)

Even though page1->_refcount is zero after HVO, get_page_unless_zero() can
still try to modify this read-only field, resulting in a crash.

An independent report [2] confirmed this race.

There are two discussed approaches to fix this race:
1. Make RO vmemmap RW so that get_page_unless_zero() can fail without
   triggering a PF.
2. Use RCU to make sure get_page_unless_zero() either sees zero
   page->_refcount through the old vmemmap or non-zero page->_refcount
   through the new one.

The second approach is preferred here because:
1. It can prevent illegal modifications to struct page[] that has been
   HVO'ed;
2. It can be generalized, in a way similar to ZERO_PAGE(), to fix
   similar races in other places, e.g., arch_remove_memory() on x86
   [3], which frees vmemmap mapping offlined struct page[].

While adding synchronize_rcu(), the goal is to be surgical, rather than
optimized.  Specifically, calls to synchronize_rcu() on the error handling
paths can be coalesced, but it is not done for the sake of Simplicity:
noticeably, this fix removes ~50% more lines than it adds.

According to the hugetlb_optimize_vmemmap section in
Documentation/admin-guide/sysctl/vm.rst, enabling HVO makes allocating or
freeing hugeTLB pages "~2x slower than before".  Having synchronize_rcu()
on top makes those operations even worse, and this also affects the user
interface /proc/sys/vm/nr_overcommit_hugepages.

This is *very* hard to trigger:

1. Most hugeTLB use cases I know of are static, i.e., reserved at
   boot time, because allocating at runtime is not reliable at all.

2. On top of that, someone has to be very unlucky to get tripped
   over above, because the race window is so small -- I wasn't able to
   trigger it with a stress testing that does nothing but that (with
   THPs though).

[1] https://lore.kernel.org/20240229183436.4110845-4-yuzhao@google.com/
[2] https://lore.kernel.org/917FFC7F-0615-44DD-90EE-9F85F8EA9974@linux.dev/
[3] https://lore.kernel.org/be130a96-a27e-4240-ad78-776802f57cad@redhat.com/

Link: https://lkml.kernel.org/r/20240627222705.2974207-1-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Acked-by: Muchun Song <muchun.song@linux.dev>
Cc: David Hildenbrand <david@redhat.com>
Cc: Frank van der Linden <fvdl@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Yang Shi <yang@os.amperecomputing.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page_ref.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 490d0ad6e56d..8c236c651d1d 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -230,7 +230,13 @@ static inline int folio_ref_dec_return(struct folio *folio)
 
 static inline bool page_ref_add_unless(struct page *page, int nr, int u)
 {
-	bool ret = atomic_add_unless(&page->_refcount, nr, u);
+	bool ret = false;
+
+	rcu_read_lock();
+	/* avoid writing to the vmemmap area being remapped */
+	if (!page_is_fake_head(page) && page_ref_count(page) != u)
+		ret = atomic_add_unless(&page->_refcount, nr, u);
+	rcu_read_unlock();
 
 	if (page_ref_tracepoint_active(page_ref_mod_unless))
 		__page_ref_mod_unless(page, nr, ret);
-- 
cgit v1.2.3


From 011f583781fa46699f1d4c4e9c39ad68f05ced2d Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 24 Jun 2024 11:39:32 +0200
Subject: genirq/irq_sim: add an extended irq_sim initializer

Currently users of the interrupt simulator don't have any way of being
notified about interrupts from the simulated domain being requested or
released. This causes a problem for one of the users - the GPIO
simulator - which is unable to lock the pins as interrupts.

Define a structure containing callbacks to be executed on various
irq_sim-related events (for now: irq request and release) and provide an
extended function for creating simulated interrupt domains that takes it
and a pointer to custom user data (to be passed to said callbacks) as
arguments.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20240624093934.17089-2-brgl@bgdev.pl
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/irq_sim.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq_sim.h b/include/linux/irq_sim.h
index ab831e5ae748..89b4d8ff274b 100644
--- a/include/linux/irq_sim.h
+++ b/include/linux/irq_sim.h
@@ -16,11 +16,28 @@
  * requested like normal irqs and enqueued from process context.
  */
 
+struct irq_sim_ops {
+	int (*irq_sim_irq_requested)(struct irq_domain *domain,
+				     irq_hw_number_t hwirq, void *data);
+	void (*irq_sim_irq_released)(struct irq_domain *domain,
+				     irq_hw_number_t hwirq, void *data);
+};
+
 struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode,
 					 unsigned int num_irqs);
 struct irq_domain *devm_irq_domain_create_sim(struct device *dev,
 					      struct fwnode_handle *fwnode,
 					      unsigned int num_irqs);
+struct irq_domain *irq_domain_create_sim_full(struct fwnode_handle *fwnode,
+					      unsigned int num_irqs,
+					      const struct irq_sim_ops *ops,
+					      void *data);
+struct irq_domain *
+devm_irq_domain_create_sim_full(struct device *dev,
+				struct fwnode_handle *fwnode,
+				unsigned int num_irqs,
+				const struct irq_sim_ops *ops,
+				void *data);
 void irq_domain_remove_sim(struct irq_domain *domain);
 
 #endif /* _LINUX_IRQ_SIM_H */
-- 
cgit v1.2.3


From 162e06871e6dcde861ef608e0c00a8b6a2d35d43 Mon Sep 17 00:00:00 2001
From: Anuj Gupta <anuj20.g@samsung.com>
Date: Thu, 4 Jul 2024 11:45:15 +0530
Subject: block: t10-pi: Return correct ref tag when queue has no integrity
 profile

Commit c6e56cf6b2e7 ("block: move integrity information into
queue_limits") changed the ref tag calculation logic. It would break if
there is no integrity profile. This in turn causes read/write failures
for such cases.

Fixes: c6e56cf6b2e7 ("block: move integrity information into queue_limits")
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Link: https://lore.kernel.org/r/20240704061515.282343-1-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/t10-pi.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 1773610010eb..2c59fe3efcd4 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -39,8 +39,11 @@ struct t10_pi_tuple {
 
 static inline u32 t10_pi_ref_tag(struct request *rq)
 {
-	unsigned int shift = rq->q->limits.integrity.interval_exp;
+	unsigned int shift = ilog2(queue_logical_block_size(rq->q));
 
+	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+	    rq->q->limits.integrity.interval_exp)
+		shift = rq->q->limits.integrity.interval_exp;
 	return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff;
 }
 
@@ -61,8 +64,11 @@ static inline u64 lower_48_bits(u64 n)
 
 static inline u64 ext_pi_ref_tag(struct request *rq)
 {
-	unsigned int shift = rq->q->limits.integrity.interval_exp;
+	unsigned int shift = ilog2(queue_logical_block_size(rq->q));
 
+	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+	    rq->q->limits.integrity.interval_exp)
+		shift = rq->q->limits.integrity.interval_exp;
 	return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT));
 }
 
-- 
cgit v1.2.3


From c10bc5614ce0027fa2282ad11827629d81957a3a Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Wed, 3 Jul 2024 20:44:18 +0200
Subject: ata,scsi: Remove wrappers ata_sas_tport_{add,delete}()

The ata_sas_tport_add() and ata_sas_tport_delete() wrappers only exist in
order to export the internal libata functions which they wrap.
Remove the wrappers and instead export the libata functions directly.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240703184418.723066-12-cassel@kernel.org
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/libata.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 7d3bd7c9664a..586f0116d1d7 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1250,8 +1250,8 @@ extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
 					   struct ata_port_info *, struct Scsi_Host *);
 extern void ata_port_probe(struct ata_port *ap);
 extern void ata_port_free(struct ata_port *ap);
-extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap);
-extern void ata_sas_tport_delete(struct ata_port *ap);
+extern int ata_tport_add(struct device *parent, struct ata_port *ap);
+extern void ata_tport_delete(struct ata_port *ap);
 int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim,
 		struct ata_port *ap);
 extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
-- 
cgit v1.2.3


From 2199d6ff565d66ccf0ff1ea1c6466c379e7cbb58 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Wed, 3 Jul 2024 20:44:19 +0200
Subject: ata: libata: Remove unused function declaration for ata_scsi_detect()

Remove unused function declaration for ata_scsi_detect().

Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240703184418.723066-13-cassel@kernel.org
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/libata.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 586f0116d1d7..580971e11804 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1082,7 +1082,6 @@ extern int ata_host_activate(struct ata_host *host, int irq,
 			     const struct scsi_host_template *sht);
 extern void ata_host_detach(struct ata_host *host);
 extern void ata_host_init(struct ata_host *, struct device *, struct ata_port_operations *);
-extern int ata_scsi_detect(struct scsi_host_template *sht);
 extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd,
 			  void __user *arg);
 #ifdef CONFIG_COMPAT
-- 
cgit v1.2.3


From 23262cce529e9f437ce2fa8fe4c2fa5b2b182318 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Wed, 3 Jul 2024 20:44:20 +0200
Subject: ata: libata-core: Remove support for decreasing the number of ports

Commit f31871951b38 ("libata: separate out ata_host_alloc() and
ata_host_register()") added ata_host_alloc(), where the API allowed
a LLD to overallocate the number of ports supplied to ata_host_alloc(),
as long as the LLD decreased host->n_ports before calling
ata_host_register().

However, this functionally has never ever been used by a single LLD.

Because of the current API design, the assignment of ap->print_id is
deferred until registration time, which is bad, because that means that
the ata_port_*() print functions cannot be used by a LLD until after
registration time, which means that a LLD is forced to use a print
function that is non-port specific, even for a port specific error.

Remove the support for decreasing the number of ports, such that it will
be possible to assign ap->print_id earlier.

Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240703184418.723066-14-cassel@kernel.org
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/libata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 580971e11804..b7c5d3f33368 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1069,7 +1069,7 @@ extern int sata_std_hardreset(struct ata_link *link, unsigned int *class,
 			      unsigned long deadline);
 extern void ata_std_postreset(struct ata_link *link, unsigned int *classes);
 
-extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports);
+extern struct ata_host *ata_host_alloc(struct device *dev, int n_ports);
 extern struct ata_host *ata_host_alloc_pinfo(struct device *dev,
 			const struct ata_port_info * const * ppi, int n_ports);
 extern void ata_host_get(struct ata_host *host);
-- 
cgit v1.2.3


From 1dd63a6b573ffb989ca618e60b6469f885454606 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Wed, 3 Jul 2024 20:44:22 +0200
Subject: ata: libata-core: Remove local_port_no struct member

ap->local_port_no is simply ap->port_no + 1.
Since ap->local_port_no can be derived from ap->port_no, there is no need
for the ap->local_port_no struct member, so remove ap->local_port_no.

Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240703184418.723066-16-cassel@kernel.org
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/libata.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index b7c5d3f33368..84a7bfbac9fa 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -814,7 +814,6 @@ struct ata_port {
 	/* Flags that change dynamically, protected by ap->lock */
 	unsigned int		pflags; /* ATA_PFLAG_xxx */
 	unsigned int		print_id; /* user visible unique port ID */
-	unsigned int            local_port_no; /* host local port num */
 	unsigned int		port_no; /* 0 based port no. inside the host */
 
 #ifdef CONFIG_ATA_SFF
-- 
cgit v1.2.3


From 0d3603acffe294ffacaf75b18b46f182b68a50df Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Wed, 3 Jul 2024 20:44:25 +0200
Subject: ata,scsi: Remove wrapper ata_sas_port_alloc()

The ata_sas_port_alloc() wrapper mainly exists in order to export the
internal libata function which it wraps. The secondary reason is that
it initializes some ata_port struct members.

However, ata_sas_port_alloc() is only used in a single location,
sas_ata_init(), which already performs some ata_port struct member
initialization, so it does not make sense to spread this initialization
out over two separate locations.

Thus, remove the wrapper and instead export the libata function directly,
and move the libsas specific ata_port initialization to sas_ata_init(),
which already does some ata_port initialization.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240703184418.723066-19-cassel@kernel.org
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 include/linux/libata.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 84a7bfbac9fa..17394098bee9 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1244,9 +1244,8 @@ extern int sata_link_debounce(struct ata_link *link,
 extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 			     bool spm_wakeup);
 extern int ata_slave_link_init(struct ata_port *ap);
-extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
-					   struct ata_port_info *, struct Scsi_Host *);
 extern void ata_port_probe(struct ata_port *ap);
+extern struct ata_port *ata_port_alloc(struct ata_host *host);
 extern void ata_port_free(struct ata_port *ap);
 extern int ata_tport_add(struct device *parent, struct ata_port *ap);
 extern void ata_tport_delete(struct ata_port *ap);
-- 
cgit v1.2.3


From a1944676767e855869b6af8e1c7e185372feaf31 Mon Sep 17 00:00:00 2001
From: Gerhard Engleder <eg@keba.com>
Date: Sun, 30 Jun 2024 21:47:39 +0200
Subject: misc: keba: Add basic KEBA CP500 system FPGA support

The KEBA CP500 system FPGA is a PCIe device, which consists of multiple
IP cores. Every IP core has its own auxiliary driver. The cp500 driver
registers an auxiliary device for each device and the corresponding
drivers are loaded by the Linux driver infrastructure.

Currently 3 variants of this device exists. Every variant has its own
PCI device ID, which is used to determine the list of available IP
cores. In this first version only the auxiliary device for the I2C
controller is registered.

Besides the auxiliary device registration some other basic functions of
the FPGA are implemented; e.g, FPGA version sysfs file, keep FPGA
configuration on reset sysfs file, error message for errors on the
internal AXI bus of the FPGA.

Signed-off-by: Gerhard Engleder <eg@keba.com>
Link: https://lore.kernel.org/r/20240630194740.7137-2-gerhard@engleder-embedded.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/misc/keba.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 include/linux/misc/keba.h

(limited to 'include')

diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h
new file mode 100644
index 000000000000..323b31a847c5
--- /dev/null
+++ b/include/linux/misc/keba.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2024, KEBA Industrial Automation Gmbh */
+
+#ifndef _LINUX_MISC_KEBA_H
+#define _LINUX_MISC_KEBA_H
+
+#include <linux/auxiliary_bus.h>
+
+struct i2c_board_info;
+
+/**
+ * struct keba_i2c_auxdev - KEBA I2C auxiliary device
+ * @auxdev: auxiliary device object
+ * @io: address range of I2C controller IO memory
+ * @info_size: number of I2C devices to be probed
+ * @info: I2C devices to be probed
+ */
+struct keba_i2c_auxdev {
+	struct auxiliary_device auxdev;
+	struct resource io;
+	int info_size;
+	struct i2c_board_info *info;
+};
+
+#endif /* _LINUX_MISC_KEBA_H */
-- 
cgit v1.2.3


From 37797c605da33445adc112561695f70bfaa11133 Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Mon, 1 Jul 2024 13:30:34 +0200
Subject: soc: fsl: cpm1: qmc: Introduce functions to get a channel from a
 phandle list

qmc_chan_get_byphandle() and the resource managed version retrieve a
channel from a simple phandle.

Extend the API and introduce qmc_chan_get_byphandles_index() and the
resource managed version in order to retrieve a channel from a phandle
list using the provided index to identify the phandle in the list.

Also update qmc_chan_get_byphandle() and the resource managed version to
use qmc_chan_get_byphandles_index() and so avoid code duplication.

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Link: https://patch.msgid.link/20240701113038.55144-8-herve.codina@bootlin.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/soc/fsl/qe/qmc.h | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/soc/fsl/qe/qmc.h b/include/soc/fsl/qe/qmc.h
index 2a333fc1ea81..0fa7205145ce 100644
--- a/include/soc/fsl/qe/qmc.h
+++ b/include/soc/fsl/qe/qmc.h
@@ -16,11 +16,30 @@ struct device_node;
 struct device;
 struct qmc_chan;
 
-struct qmc_chan *qmc_chan_get_byphandle(struct device_node *np, const char *phandle_name);
+struct qmc_chan *qmc_chan_get_byphandles_index(struct device_node *np,
+					       const char *phandles_name,
+					       int index);
+struct qmc_chan *devm_qmc_chan_get_byphandles_index(struct device *dev,
+						    struct device_node *np,
+						    const char *phandles_name,
+						    int index);
+
+static inline struct qmc_chan *qmc_chan_get_byphandle(struct device_node *np,
+						      const char *phandle_name)
+{
+	return qmc_chan_get_byphandles_index(np, phandle_name, 0);
+}
+
+static inline struct qmc_chan *devm_qmc_chan_get_byphandle(struct device *dev,
+							   struct device_node *np,
+							   const char *phandle_name)
+{
+	return devm_qmc_chan_get_byphandles_index(dev, np, phandle_name, 0);
+}
+
 struct qmc_chan *qmc_chan_get_bychild(struct device_node *np);
 void qmc_chan_put(struct qmc_chan *chan);
-struct qmc_chan *devm_qmc_chan_get_byphandle(struct device *dev, struct device_node *np,
-					     const char *phandle_name);
+
 struct qmc_chan *devm_qmc_chan_get_bychild(struct device *dev, struct device_node *np);
 
 enum qmc_mode {
-- 
cgit v1.2.3


From af8432b2e41abc0a20bdc01a3b144ea7b2f1ee09 Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Mon, 1 Jul 2024 13:30:35 +0200
Subject: soc: fsl: cpm1: qmc: Introduce qmc_chan_count_phandles()

No function in the QMC API is available to get the number of phandles
present in a phandle list.

Fill this lack introducing qmc_chan_count_phandles().

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Link: https://patch.msgid.link/20240701113038.55144-9-herve.codina@bootlin.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/soc/fsl/qe/qmc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/soc/fsl/qe/qmc.h b/include/soc/fsl/qe/qmc.h
index 0fa7205145ce..294e42ea8d4c 100644
--- a/include/soc/fsl/qe/qmc.h
+++ b/include/soc/fsl/qe/qmc.h
@@ -16,6 +16,8 @@ struct device_node;
 struct device;
 struct qmc_chan;
 
+int qmc_chan_count_phandles(struct device_node *np, const char *phandles_name);
+
 struct qmc_chan *qmc_chan_get_byphandles_index(struct device_node *np,
 					       const char *phandles_name,
 					       int index);
-- 
cgit v1.2.3


From 4acab508eb03dc1827db6005764de29299e07569 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Wed, 3 Jul 2024 10:31:41 +0200
Subject: thermal: core: constify 'type' in
 devm_thermal_of_cooling_device_register()

The 'type' string passed to thermal_of_cooling_device_register() is a
'const char *', so do the same in the devm interface.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://patch.msgid.link/20240703083141.96013-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/thermal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index f1155c0439c4..f732dab20368 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -261,7 +261,7 @@ thermal_of_cooling_device_register(struct device_node *np, const char *, void *,
 struct thermal_cooling_device *
 devm_thermal_of_cooling_device_register(struct device *dev,
 				struct device_node *np,
-				char *type, void *devdata,
+				const char *type, void *devdata,
 				const struct thermal_cooling_device_ops *ops);
 void thermal_cooling_device_update(struct thermal_cooling_device *);
 void thermal_cooling_device_unregister(struct thermal_cooling_device *);
@@ -305,7 +305,7 @@ thermal_of_cooling_device_register(struct device_node *np,
 static inline struct thermal_cooling_device *
 devm_thermal_of_cooling_device_register(struct device *dev,
 				struct device_node *np,
-				char *type, void *devdata,
+				const char *type, void *devdata,
 				const struct thermal_cooling_device_ops *ops)
 {
 	return ERR_PTR(-ENODEV);
-- 
cgit v1.2.3


From 14678219cf4093e897ab353fd78eab7994d1be7d Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 2 Jul 2024 14:34:35 +0800
Subject: iommu: Introduce domain attachment handle

Currently, when attaching a domain to a device or its PASID, domain is
stored within the iommu group. It could be retrieved for use during the
window between attachment and detachment.

With new features introduced, there's a need to store more information
than just a domain pointer. This information essentially represents the
association between a domain and a device. For example, the SVA code
already has a custom struct iommu_sva which represents a bond between
sva domain and a PASID of a device. Looking forward, the IOMMUFD needs
a place to store the iommufd_device pointer in the core, so that the
device object ID could be quickly retrieved in the critical fault handling
path.

Introduce domain attachment handle that explicitly represents the
attachment relationship between a domain and a device or its PASID.

Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20240702063444.105814-2-baolu.lu@linux.intel.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/iommu.h | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 17b3f36ad843..afc5af0069bb 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -989,12 +989,22 @@ struct iommu_fwspec {
 /* ATS is supported */
 #define IOMMU_FWSPEC_PCI_RC_ATS			(1 << 0)
 
+/*
+ * An iommu attach handle represents a relationship between an iommu domain
+ * and a PASID or RID of a device. It is allocated and managed by the component
+ * that manages the domain and is stored in the iommu group during the time the
+ * domain is attached.
+ */
+struct iommu_attach_handle {
+	struct iommu_domain		*domain;
+};
+
 /**
  * struct iommu_sva - handle to a device-mm bond
  */
 struct iommu_sva {
+	struct iommu_attach_handle	handle;
 	struct device			*dev;
-	struct iommu_domain		*domain;
 	struct list_head		handle_item;
 	refcount_t			users;
 };
@@ -1052,7 +1062,8 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner);
 void iommu_device_release_dma_owner(struct device *dev);
 
 int iommu_attach_device_pasid(struct iommu_domain *domain,
-			      struct device *dev, ioasid_t pasid);
+			      struct device *dev, ioasid_t pasid,
+			      struct iommu_attach_handle *handle);
 void iommu_detach_device_pasid(struct iommu_domain *domain,
 			       struct device *dev, ioasid_t pasid);
 struct iommu_domain *
@@ -1388,7 +1399,8 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner)
 }
 
 static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
-					    struct device *dev, ioasid_t pasid)
+					    struct device *dev, ioasid_t pasid,
+					    struct iommu_attach_handle *handle)
 {
 	return -ENODEV;
 }
-- 
cgit v1.2.3


From 3e7f57d1ef3f5fbed58974fae38d35e430f57d35 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 2 Jul 2024 14:34:36 +0800
Subject: iommu: Remove sva handle list

The struct sva_iommu represents an association between an SVA domain and
a PASID of a device. It's stored in the iommu group's pasid array and also
tracked by a list in the per-mm data structure. Removes duplicate tracking
of sva_iommu by eliminating the list.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20240702063444.105814-3-baolu.lu@linux.intel.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/iommu.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index afc5af0069bb..87ebcc29020e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1005,14 +1005,12 @@ struct iommu_attach_handle {
 struct iommu_sva {
 	struct iommu_attach_handle	handle;
 	struct device			*dev;
-	struct list_head		handle_item;
 	refcount_t			users;
 };
 
 struct iommu_mm_data {
 	u32			pasid;
 	struct list_head	sva_domains;
-	struct list_head	sva_handles;
 };
 
 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
-- 
cgit v1.2.3


From 06cdcc32d65759d42c6340700796e2906045b6a5 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 2 Jul 2024 14:34:37 +0800
Subject: iommu: Add attach handle to struct iopf_group

Previously, the domain that a page fault targets is stored in an
iopf_group, which represents a minimal set of page faults. With the
introduction of attach handle, replace the domain with the handle
so that the fault handler can obtain more information as needed
when handling the faults.

iommu_report_device_fault() is currently used for SVA page faults,
which handles the page fault in an internal cycle. The domain is retrieved
with iommu_get_domain_for_dev_pasid() if the pasid in the fault message
is valid. This doesn't work in IOMMUFD case, where if the pasid table of
a device is wholly managed by user space, there is no domain attached to
the PASID of the device, and all page faults are forwarded through a
NESTING domain attaching to RID.

Add a static flag in iommu ops, which indicates if the IOMMU driver
supports user-managed PASID tables. In the iopf deliver path, if no
attach handle found for the iopf PASID, roll back to RID domain when
the IOMMU driver supports this capability.

iommu_get_domain_for_dev_pasid() is no longer used and can be removed.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20240702063444.105814-4-baolu.lu@linux.intel.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/iommu.h | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 87ebcc29020e..910aec80886e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -127,7 +127,7 @@ struct iopf_group {
 	/* list node for iommu_fault_param::faults */
 	struct list_head pending_node;
 	struct work_struct work;
-	struct iommu_domain *domain;
+	struct iommu_attach_handle *attach_handle;
 	/* The device's fault data parameter. */
 	struct iommu_fault_param *fault_param;
 };
@@ -547,6 +547,10 @@ static inline int __iommu_copy_struct_from_user_array(
  * @default_domain: If not NULL this will always be set as the default domain.
  *                  This should be an IDENTITY/BLOCKED/PLATFORM domain.
  *                  Do not use in new drivers.
+ * @user_pasid_table: IOMMU driver supports user-managed PASID table. There is
+ *                    no user domain for each PASID and the I/O page faults are
+ *                    forwarded through the user domain attached to the device
+ *                    RID.
  */
 struct iommu_ops {
 	bool (*capable)(struct device *dev, enum iommu_cap);
@@ -590,6 +594,7 @@ struct iommu_ops {
 	struct iommu_domain *blocked_domain;
 	struct iommu_domain *release_domain;
 	struct iommu_domain *default_domain;
+	u8 user_pasid_table:1;
 };
 
 /**
@@ -1064,9 +1069,6 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
 			      struct iommu_attach_handle *handle);
 void iommu_detach_device_pasid(struct iommu_domain *domain,
 			       struct device *dev, ioasid_t pasid);
-struct iommu_domain *
-iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
-			       unsigned int type);
 ioasid_t iommu_alloc_global_pasid(struct device *dev);
 void iommu_free_global_pasid(ioasid_t pasid);
 #else /* CONFIG_IOMMU_API */
@@ -1408,13 +1410,6 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain,
 {
 }
 
-static inline struct iommu_domain *
-iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
-			       unsigned int type)
-{
-	return NULL;
-}
-
 static inline ioasid_t iommu_alloc_global_pasid(struct device *dev)
 {
 	return IOMMU_PASID_INVALID;
-- 
cgit v1.2.3


From a27bf2743cb80d3b36b5b43e8e2e702412c41668 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 10 Jun 2024 16:55:35 +0800
Subject: iommu: Add iommu_paging_domain_alloc() interface

Commit <17de3f5fdd35> ("iommu: Retire bus ops") removes iommu ops from
bus. The iommu subsystem no longer relies on bus for operations. So the
bus parameter in iommu_domain_alloc() is no longer relevant.

Add a new interface named iommu_paging_domain_alloc(), which explicitly
indicates the allocation of a paging domain for DMA managed by a kernel
driver. The new interface takes a device pointer as its parameter, that
better aligns with the current iommu subsystem.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Link: https://lore.kernel.org/r/20240610085555.88197-2-baolu.lu@linux.intel.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/iommu.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 17b3f36ad843..58ea0935d355 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -780,6 +780,7 @@ extern bool iommu_present(const struct bus_type *bus);
 extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
 extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
 extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus);
+struct iommu_domain *iommu_paging_domain_alloc(struct device *dev);
 extern void iommu_domain_free(struct iommu_domain *domain);
 extern int iommu_attach_device(struct iommu_domain *domain,
 			       struct device *dev);
@@ -1086,6 +1087,11 @@ static inline struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus
 	return NULL;
 }
 
+static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline void iommu_domain_free(struct iommu_domain *domain)
 {
 }
-- 
cgit v1.2.3


From 3f7c320916282c26812d70cfe8830abb9e4dc696 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 2 Jul 2024 12:40:48 +0100
Subject: iommu: Resolve fwspec ops automatically

There's no real need for callers to resolve ops from a fwnode in order
to then pass both to iommu_fwspec_init() - it's simpler and more sensible
for that to resolve the ops itself. This in turn means we can centralise
the notion of checking for a present driver, and enforce that fwspecs
aren't allocated unless and until we know they will be usable.

Also use this opportunity to modernise with some "new" helpers that
arrived shortly after this code was first written; the generic
fwnode_handle_get() clears up that ugly get/put mismatch, while
of_fwnode_handle() can now abstract those open-coded dereferences.

Tested-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/0e2727adeb8cd73274425322f2f793561bdc927e.1719919669.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/acpi/acpi_bus.h |  3 +--
 include/linux/iommu.h   | 13 ++-----------
 2 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 1a4dfd7a1c4a..9d815837e297 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -736,8 +736,7 @@ struct iommu_ops;
 bool acpi_dma_supported(const struct acpi_device *adev);
 enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
 int acpi_iommu_fwspec_init(struct device *dev, u32 id,
-			   struct fwnode_handle *fwnode,
-			   const struct iommu_ops *ops);
+			   struct fwnode_handle *fwnode);
 int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map);
 int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr,
 			   const u32 *input_id);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 17b3f36ad843..81893aad9ee4 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1005,11 +1005,9 @@ struct iommu_mm_data {
 	struct list_head	sva_handles;
 };
 
-int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
-		      const struct iommu_ops *ops);
+int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode);
 void iommu_fwspec_free(struct device *dev);
 int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids);
-const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode);
 
 static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
 {
@@ -1315,8 +1313,7 @@ static inline void iommu_device_unlink(struct device *dev, struct device *link)
 }
 
 static inline int iommu_fwspec_init(struct device *dev,
-				    struct fwnode_handle *iommu_fwnode,
-				    const struct iommu_ops *ops)
+				    struct fwnode_handle *iommu_fwnode)
 {
 	return -ENODEV;
 }
@@ -1331,12 +1328,6 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids,
 	return -ENODEV;
 }
 
-static inline
-const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode)
-{
-	return NULL;
-}
-
 static inline int
 iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat)
 {
-- 
cgit v1.2.3


From 3e36c15fc1cce65cccc93ed16f86d8ff9d2f9992 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 2 Jul 2024 12:40:51 +0100
Subject: iommu: Remove iommu_fwspec ops

The ops in iommu_fwspec are only needed for the early configuration and
probe process, and by now are easy enough to derive on-demand in those
couple of places which need them, so remove the redundant stored copy.

Tested-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/55c1410b2cd09531eab4f8e2f18f92a0faa0ea75.1719919669.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/iommu.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 81893aad9ee4..11ae1750cb1d 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -968,7 +968,6 @@ extern struct iommu_group *generic_single_device_group(struct device *dev);
 
 /**
  * struct iommu_fwspec - per-device IOMMU instance data
- * @ops: ops for this device's IOMMU
  * @iommu_fwnode: firmware handle for this device's IOMMU
  * @flags: IOMMU_FWSPEC_* flags
  * @num_ids: number of associated device IDs
@@ -979,7 +978,6 @@ extern struct iommu_group *generic_single_device_group(struct device *dev);
  * consumers.
  */
 struct iommu_fwspec {
-	const struct iommu_ops	*ops;
 	struct fwnode_handle	*iommu_fwnode;
 	u32			flags;
 	unsigned int		num_ids;
-- 
cgit v1.2.3


From 7640f1a44eba0cc1a41b312080c236f5c668cd50 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony.lindgren@linux.intel.com>
Date: Wed, 3 Jul 2024 13:06:08 +0300
Subject: printk: Add match_devname_and_update_preferred_console()

Let's add match_devname_and_update_preferred_console() for driver
subsystems to call during init when the console is ready, and it's
character device name is known. For now, we use it only for the serial
layer to allow console=DEVNAME:0.0 style hardware based addressing for
consoles.

The earlier attempt on doing this caused a regression with the kernel
command line console order as it added calling __add_preferred_console()
again later on during init. A better approach was suggested by Petr where
we add the deferred console to the console_cmdline[] and update it later
on when the console is ready.

Suggested-by: Petr Mladek <pmladek@suse.com>
Co-developed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Tony Lindgren <tony.lindgren@linux.intel.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240703100615.118762-2-tony.lindgren@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/printk.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 65c5184470f1..7239976698e4 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -60,6 +60,10 @@ static inline const char *printk_skip_headers(const char *buffer)
 #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT
 #define CONSOLE_LOGLEVEL_QUIET	 CONFIG_CONSOLE_LOGLEVEL_QUIET
 
+int match_devname_and_update_preferred_console(const char *match,
+					       const char *name,
+					       const short idx);
+
 extern int console_printk[];
 
 #define console_loglevel (console_printk[0])
-- 
cgit v1.2.3


From 608f6976c309793ceea37292c54b057dab091944 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Wed, 26 Jun 2024 07:35:37 -0700
Subject: perf/x86/intel: Support new data source for Lunar Lake

A new PEBS data source format is introduced for the p-core of Lunar
Lake. The data source field is extended to 8 bits with new encodings.

A new layout is introduced into the union intel_x86_pebs_dse.
Introduce the lnl_latency_data() to parse the new format.
Enlarge the pebs_data_source[] accordingly to include new encodings.

Only the mem load and the mem store events can generate the data source.
Introduce INTEL_HYBRID_LDLAT_CONSTRAINT and
INTEL_HYBRID_STLAT_CONSTRAINT to mark them.

Add two new bits for the new cache-related data src, L2_MHB and MSC.
The L2_MHB is short for L2 Miss Handling Buffer, which is similar to
LFB (Line Fill Buffer), but to track the L2 Cache misses.
The MSC stands for the memory-side cache.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Link: https://lkml.kernel.org/r/20240626143545.480761-6-kan.liang@linux.intel.com
---
 include/uapi/linux/perf_event.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 3a64499b0f5d..4842c36fdf80 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1349,12 +1349,14 @@ union perf_mem_data_src {
 #define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
 #define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
 #define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
-/* 5-0x7 available */
+#define PERF_MEM_LVLNUM_L2_MHB	0x05 /* L2 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_MSC	0x06 /* Memory-side Cache */
+/* 0x7 available */
 #define PERF_MEM_LVLNUM_UNC	0x08 /* Uncached */
 #define PERF_MEM_LVLNUM_CXL	0x09 /* CXL */
 #define PERF_MEM_LVLNUM_IO	0x0a /* I/O */
 #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
-#define PERF_MEM_LVLNUM_LFB	0x0c /* LFB */
+#define PERF_MEM_LVLNUM_LFB	0x0c /* LFB / L1 Miss Handling Buffer */
 #define PERF_MEM_LVLNUM_RAM	0x0d /* RAM */
 #define PERF_MEM_LVLNUM_PMEM	0x0e /* PMEM */
 #define PERF_MEM_LVLNUM_NA	0x0f /* N/A */
-- 
cgit v1.2.3


From 473b2cf9c4d1711146da1d8574c61e92c6672892 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Thu, 6 Jun 2024 12:56:35 +0530
Subject: PCI: endpoint: Introduce 'epc_deinit' event and notify the EPF
 drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As like the 'epc_init' event, that is used to signal the EPF drivers about
the EPC initialization, let's introduce 'epc_deinit' event that is used to
signal EPC deinitialization.

The EPC deinitialization applies only when any sort of fundamental reset
is supported by the endpoint controller as per the PCIe spec.

Reference: PCIe r6.0, sec 4.2.5.9.1 and 6.6.1.

Currently, some EPC drivers like pcie-qcom-ep and pcie-tegra194 support
PERST# as the fundamental reset. So the 'deinit' event will be notified to
the EPF drivers when PERST# assert happens in the above mentioned EPC
drivers.

The EPF drivers, on receiving the event through the epc_deinit() callback
should reset the EPF state machine and also cleanup any configuration that
got affected by the fundamental reset like BAR, DMA etc...

This change also warrants skipping the cleanups in unbind() if already done
in epc_deinit().

Link: https://lore.kernel.org/r/20240606-pci-deinit-v1-2-4395534520dc@linaro.org
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Reviewed-by: Siddharth Vadapalli <s-vadapalli@ti.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
---
 include/linux/pci-epc.h | 13 +++++++++++++
 include/linux/pci-epf.h |  2 ++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 11115cd0fe5b..85bdf2adb760 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -197,6 +197,8 @@ struct pci_epc_features {
 
 #define to_pci_epc(device) container_of((device), struct pci_epc, dev)
 
+#ifdef CONFIG_PCI_ENDPOINT
+
 #define pci_epc_create(dev, ops)    \
 		__pci_epc_create((dev), (ops), THIS_MODULE)
 #define devm_pci_epc_create(dev, ops)    \
@@ -226,6 +228,7 @@ void pci_epc_linkup(struct pci_epc *epc);
 void pci_epc_linkdown(struct pci_epc *epc);
 void pci_epc_init_notify(struct pci_epc *epc);
 void pci_epc_notify_pending_init(struct pci_epc *epc, struct pci_epf *epf);
+void pci_epc_deinit_notify(struct pci_epc *epc);
 void pci_epc_bus_master_enable_notify(struct pci_epc *epc);
 void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf,
 			enum pci_epc_interface_type type);
@@ -272,4 +275,14 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc,
 				     phys_addr_t *phys_addr, size_t size);
 void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr,
 			   void __iomem *virt_addr, size_t size);
+
+#else
+static inline void pci_epc_init_notify(struct pci_epc *epc)
+{
+}
+
+static inline void pci_epc_deinit_notify(struct pci_epc *epc)
+{
+}
+#endif /* CONFIG_PCI_ENDPOINT */
 #endif /* __LINUX_PCI_EPC_H */
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index dc759eb7157c..0639d4dc8986 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -71,12 +71,14 @@ struct pci_epf_ops {
 /**
  * struct pci_epc_event_ops - Callbacks for capturing the EPC events
  * @epc_init: Callback for the EPC initialization complete event
+ * @epc_deinit: Callback for the EPC deinitialization event
  * @link_up: Callback for the EPC link up event
  * @link_down: Callback for the EPC link down event
  * @bus_master_enable: Callback for the EPC Bus Master Enable event
  */
 struct pci_epc_event_ops {
 	int (*epc_init)(struct pci_epf *epf);
+	void (*epc_deinit)(struct pci_epf *epf);
 	int (*link_up)(struct pci_epf *epf);
 	int (*link_down)(struct pci_epf *epf);
 	int (*bus_master_enable)(struct pci_epf *epf);
-- 
cgit v1.2.3


From e269d79c7d35aa3808b1f3c1737d63dab504ddc8 Mon Sep 17 00:00:00 2001
From: Denis Arefev <arefev@swemel.ru>
Date: Thu, 13 Jun 2024 12:54:48 +0300
Subject: net: missing check virtio

Two missing check in virtio_net_hdr_to_skb() allowed syzbot
to crash kernels again

1. After the skb_segment function the buffer may become non-linear
(nr_frags != 0), but since the SKBTX_SHARED_FRAG flag is not set anywhere
the __skb_linearize function will not be executed, then the buffer will
remain non-linear. Then the condition (offset >= skb_headlen(skb))
becomes true, which causes WARN_ON_ONCE in skb_checksum_help.

2. The struct sk_buff and struct virtio_net_hdr members must be
mathematically related.
(gso_size) must be greater than (needed) otherwise WARN_ON_ONCE.
(remainder) must be greater than (needed) otherwise WARN_ON_ONCE.
(remainder) may be 0 if division is without remainder.

offset+2 (4191) > skb_headlen() (1116)
WARNING: CPU: 1 PID: 5084 at net/core/dev.c:3303 skb_checksum_help+0x5e2/0x740 net/core/dev.c:3303
Modules linked in:
CPU: 1 PID: 5084 Comm: syz-executor336 Not tainted 6.7.0-rc3-syzkaller-00014-gdf60cee26a2e #0
Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023
RIP: 0010:skb_checksum_help+0x5e2/0x740 net/core/dev.c:3303
Code: 89 e8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 52 01 00 00 44 89 e2 2b 53 74 4c 89 ee 48 c7 c7 40 57 e9 8b e8 af 8f dd f8 90 <0f> 0b 90 90 e9 87 fe ff ff e8 40 0f 6e f9 e9 4b fa ff ff 48 89 ef
RSP: 0018:ffffc90003a9f338 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff888025125780 RCX: ffffffff814db209
RDX: ffff888015393b80 RSI: ffffffff814db216 RDI: 0000000000000001
RBP: ffff8880251257f4 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000001 R12: 000000000000045c
R13: 000000000000105f R14: ffff8880251257f0 R15: 000000000000105d
FS:  0000555555c24380(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000002000f000 CR3: 0000000023151000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 ip_do_fragment+0xa1b/0x18b0 net/ipv4/ip_output.c:777
 ip_fragment.constprop.0+0x161/0x230 net/ipv4/ip_output.c:584
 ip_finish_output_gso net/ipv4/ip_output.c:286 [inline]
 __ip_finish_output net/ipv4/ip_output.c:308 [inline]
 __ip_finish_output+0x49c/0x650 net/ipv4/ip_output.c:295
 ip_finish_output+0x31/0x310 net/ipv4/ip_output.c:323
 NF_HOOK_COND include/linux/netfilter.h:303 [inline]
 ip_output+0x13b/0x2a0 net/ipv4/ip_output.c:433
 dst_output include/net/dst.h:451 [inline]
 ip_local_out+0xaf/0x1a0 net/ipv4/ip_output.c:129
 iptunnel_xmit+0x5b4/0x9b0 net/ipv4/ip_tunnel_core.c:82
 ipip6_tunnel_xmit net/ipv6/sit.c:1034 [inline]
 sit_tunnel_xmit+0xed2/0x28f0 net/ipv6/sit.c:1076
 __netdev_start_xmit include/linux/netdevice.h:4940 [inline]
 netdev_start_xmit include/linux/netdevice.h:4954 [inline]
 xmit_one net/core/dev.c:3545 [inline]
 dev_hard_start_xmit+0x13d/0x6d0 net/core/dev.c:3561
 __dev_queue_xmit+0x7c1/0x3d60 net/core/dev.c:4346
 dev_queue_xmit include/linux/netdevice.h:3134 [inline]
 packet_xmit+0x257/0x380 net/packet/af_packet.c:276
 packet_snd net/packet/af_packet.c:3087 [inline]
 packet_sendmsg+0x24ca/0x5240 net/packet/af_packet.c:3119
 sock_sendmsg_nosec net/socket.c:730 [inline]
 __sock_sendmsg+0xd5/0x180 net/socket.c:745
 __sys_sendto+0x255/0x340 net/socket.c:2190
 __do_sys_sendto net/socket.c:2202 [inline]
 __se_sys_sendto net/socket.c:2198 [inline]
 __x64_sys_sendto+0xe0/0x1b0 net/socket.c:2198
 do_syscall_x64 arch/x86/entry/common.c:51 [inline]
 do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82
 entry_SYSCALL_64_after_hwframe+0x63/0x6b

Found by Linux Verification Center (linuxtesting.org) with Syzkaller

Fixes: 0f6925b3e8da ("virtio_net: Do not pull payload in skb->head")
Signed-off-by: Denis Arefev <arefev@swemel.ru>
Message-Id: <20240613095448.27118-1-arefev@swemel.ru>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_net.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 4dfa9b69ca8d..d1d7825318c3 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -56,6 +56,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 	unsigned int thlen = 0;
 	unsigned int p_off = 0;
 	unsigned int ip_proto;
+	u64 ret, remainder, gso_size;
 
 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
@@ -98,6 +99,16 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 		u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset);
 		u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16));
 
+		if (hdr->gso_size) {
+			gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
+			ret = div64_u64_rem(skb->len, gso_size, &remainder);
+			if (!(ret && (hdr->gso_size > needed) &&
+						((remainder > needed) || (remainder == 0)))) {
+				return -EINVAL;
+			}
+			skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG;
+		}
+
 		if (!pskb_may_pull(skb, needed))
 			return -EINVAL;
 
-- 
cgit v1.2.3


From a1cacb8a8e70c38ec0c78910c668abda99fcb780 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Mon, 24 Jun 2024 17:19:56 +0200
Subject: backlight: Add BACKLIGHT_POWER_ constants for power states

Duplicate FB_BLANK_ constants as BACKLIGHT_POWER__ constants in the
backlight header file. Allows backlight drivers to avoid including
the fbdev header file and removes a compile-time dependency between
the two subsystems.

The new BACKLIGHT_POWER_ constants have the same values as their
FB_BLANK_ counterparts. Hence UAPI and internal semantics do not
change. The backlight drivers can be converted one by one. Each
instance of FB_BLANK_UNBLANK becomes BACKLIGHT_POWER_ON, each of
FB_BLANK_POWERDOWN becomes BACKLIGHT_POWER_OFF, and FB_BLANK_NORMAL
becomes BACKLIGHT_POWER_REDUCED.

Backlight code or drivers do not use FB_BLANK_VSYNC_SUSPEND and
FB_BLANK_HSYNC_SUSPEND, so no new constants for these are being
added.

The semantics of FB_BLANK_NORMAL appear inconsistent. In fbdev,
NORMAL means display off with sync enabled. In backlight code,
this translates to turn the backlight off, but some drivers
interpret it as backlight on. So we keep the current code as is,
but mark BACKLIGHT_POWER_REDUCED as deprecated. Drivers should be
fixed and the constant removed. This affects ams369fg06 and a few
DRM panel drivers.

v2:
- rename BL_CORE_ power constants to BACKLIGHT_POWER_ (Sam)
- fix documentation

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Link: https://lore.kernel.org/r/20240624152033.25016-2-tzimmermann@suse.de
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/backlight.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 19a1c0e22629..ea9c1bc8148e 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -209,15 +209,19 @@ struct backlight_properties {
 	 * attribute: /sys/class/backlight/<backlight>/bl_power
 	 * When the power property is updated update_status() is called.
 	 *
-	 * The possible values are: (0: full on, 1 to 3: power saving
-	 * modes; 4: full off), see FB_BLANK_XXX.
+	 * The possible values are: (0: full on, 4: full off), see
+	 * BACKLIGHT_POWER constants.
 	 *
-	 * When the backlight device is enabled @power is set
-	 * to FB_BLANK_UNBLANK. When the backlight device is disabled
-	 * @power is set to FB_BLANK_POWERDOWN.
+	 * When the backlight device is enabled, @power is set to
+	 * BACKLIGHT_POWER_ON. When the backlight device is disabled,
+	 * @power is set to BACKLIGHT_POWER_OFF.
 	 */
 	int power;
 
+#define BACKLIGHT_POWER_ON		(0)
+#define BACKLIGHT_POWER_OFF		(4)
+#define BACKLIGHT_POWER_REDUCED		(1) // deprecated; don't use in new code
+
 	/**
 	 * @type: The type of backlight supported.
 	 *
@@ -346,7 +350,7 @@ static inline int backlight_enable(struct backlight_device *bd)
 	if (!bd)
 		return 0;
 
-	bd->props.power = FB_BLANK_UNBLANK;
+	bd->props.power = BACKLIGHT_POWER_ON;
 	bd->props.state &= ~BL_CORE_FBBLANK;
 
 	return backlight_update_status(bd);
@@ -361,7 +365,7 @@ static inline int backlight_disable(struct backlight_device *bd)
 	if (!bd)
 		return 0;
 
-	bd->props.power = FB_BLANK_POWERDOWN;
+	bd->props.power = BACKLIGHT_POWER_OFF;
 	bd->props.state |= BL_CORE_FBBLANK;
 
 	return backlight_update_status(bd);
@@ -380,7 +384,7 @@ static inline int backlight_disable(struct backlight_device *bd)
  */
 static inline bool backlight_is_blank(const struct backlight_device *bd)
 {
-	return bd->props.power != FB_BLANK_UNBLANK ||
+	return bd->props.power != BACKLIGHT_POWER_ON ||
 	       bd->props.state & (BL_CORE_SUSPENDED | BL_CORE_FBBLANK);
 }
 
-- 
cgit v1.2.3


From 326ae03d772de6e082d346466671c3b66de2962d Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Tue, 7 May 2024 17:53:56 +0100
Subject: mfd: idt8a340_reg: Start comments with '/*'

Several comments in idt8a340_reg.h start with '/**', which denotes the
start of a Kernel doc, but are otherwise not Kernel docs.

Resolve this conflict by starting these comments with '/*' instead.

Flagged by ./scripts/kernel-doc -none

Signed-off-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20240507-clockmatrix-kernel-doc-v2-1-3138d74192dd@kernel.org
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/idt8a340_reg.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/idt8a340_reg.h b/include/linux/mfd/idt8a340_reg.h
index 0c706085c205..53a222605526 100644
--- a/include/linux/mfd/idt8a340_reg.h
+++ b/include/linux/mfd/idt8a340_reg.h
@@ -61,7 +61,7 @@
 #define HW_Q8_CTRL_SPARE  (0xa7d4)
 #define HW_Q11_CTRL_SPARE (0xa7ec)
 
-/**
+/*
  * Select FOD5 as sync_trigger for Q8 divider.
  * Transition from logic zero to one
  * sets trigger to sync Q8 divider.
@@ -70,7 +70,7 @@
  */
 #define Q9_TO_Q8_SYNC_TRIG  BIT(1)
 
-/**
+/*
  * Enable FOD5 as driver for clock and sync for Q8 divider.
  * Enable fanout buffer for FOD5.
  *
@@ -78,7 +78,7 @@
  */
 #define Q9_TO_Q8_FANOUT_AND_CLOCK_SYNC_ENABLE_MASK  (BIT(0) | BIT(2))
 
-/**
+/*
  * Select FOD6 as sync_trigger for Q11 divider.
  * Transition from logic zero to one
  * sets trigger to sync Q11 divider.
@@ -87,7 +87,7 @@
  */
 #define Q10_TO_Q11_SYNC_TRIG  BIT(1)
 
-/**
+/*
  * Enable FOD6 as driver for clock and sync for Q11 divider.
  * Enable fanout buffer for FOD6.
  *
-- 
cgit v1.2.3


From 13c151a919a876521c16810756764aa38683eb62 Mon Sep 17 00:00:00 2001
From: Andrew Davis <afd@ti.com>
Date: Thu, 13 Jun 2024 12:54:30 -0500
Subject: mfd: tps65912: Use devm helper functions to simplify probe

This simplifies probe and also allows us to remove the remove
callbacks from the core and interface drivers. Do that here.

Signed-off-by: Andrew Davis <afd@ti.com>
Link: https://lore.kernel.org/r/20240613175430.57698-1-afd@ti.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/tps65912.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mfd/tps65912.h b/include/linux/mfd/tps65912.h
index 860ec0a16c96..e5373c302722 100644
--- a/include/linux/mfd/tps65912.h
+++ b/include/linux/mfd/tps65912.h
@@ -314,6 +314,5 @@ struct tps65912 {
 extern const struct regmap_config tps65912_regmap_config;
 
 int tps65912_device_init(struct tps65912 *tps);
-void tps65912_device_exit(struct tps65912 *tps);
 
 #endif /*  __LINUX_MFD_TPS65912_H */
-- 
cgit v1.2.3


From d7636117ca976e217b6452c082e7f7c041f4019c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 5 Jun 2024 22:14:16 +0300
Subject: mfd: lm3533: Move to new GPIO descriptor-based APIs

Legacy GPIO APIs are subject to remove. Convert the driver to new APIs.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20240605191458.2536819-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/lm3533.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/lm3533.h b/include/linux/mfd/lm3533.h
index 77092f6363ad..69059a7a2ce5 100644
--- a/include/linux/mfd/lm3533.h
+++ b/include/linux/mfd/lm3533.h
@@ -16,6 +16,7 @@
 	DEVICE_ATTR(_name, S_IRUGO | S_IWUSR , show_##_name, store_##_name)
 
 struct device;
+struct gpio_desc;
 struct regmap;
 
 struct lm3533 {
@@ -23,7 +24,7 @@ struct lm3533 {
 
 	struct regmap *regmap;
 
-	int gpio_hwen;
+	struct gpio_desc *hwen;
 	int irq;
 
 	unsigned have_als:1;
@@ -69,8 +70,6 @@ enum lm3533_boost_ovp {
 };
 
 struct lm3533_platform_data {
-	int gpio_hwen;
-
 	enum lm3533_boost_ovp boost_ovp;
 	enum lm3533_boost_freq boost_freq;
 
-- 
cgit v1.2.3


From 1d845319dc819c39e2ee568f1c6bd0cdb2fbc035 Mon Sep 17 00:00:00 2001
From: Etienne Carriere <etienne.carriere@foss.st.com>
Date: Mon, 17 Jun 2024 11:20:16 +0200
Subject: dt-bindings: mfd: Dual licensing for st,stpmic1 bindings

Change include/dt-bindings/mfd/st,stpmic1.h license model from GPLv2.0
only to dual GPLv2.0 or BSD-2-Clause. I have every legitimacy to request
this change on behalf of STMicroelectronics. This change clarifies that
this DT binding header file can be shared with software components as
bootloaders and OSes that are not published under GPLv2 terms.

In CC are all the contributors to this header file.

Cc: Pascal Paillet <p.paillet@st.com>
Cc: Lee Jones <lee@kernel.org>
Cc: Rob Herring <robh@kernel.org>
Signed-off-by: Etienne Carriere <etienne.carriere@foss.st.com>
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240617092016.2958046-1-etienne.carriere@foss.st.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/dt-bindings/mfd/st,stpmic1.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/mfd/st,stpmic1.h b/include/dt-bindings/mfd/st,stpmic1.h
index 321cd08797d9..9dd15b9c743e 100644
--- a/include/dt-bindings/mfd/st,stpmic1.h
+++ b/include/dt-bindings/mfd/st,stpmic1.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
 /*
  * Copyright (C) STMicroelectronics 2018 - All Rights Reserved
  * Author: Philippe Peurichard <philippe.peurichard@st.com>,
-- 
cgit v1.2.3


From d89e8096957e35742c9922d3f6628f24de0d6163 Mon Sep 17 00:00:00 2001
From: Johan Jonker <jbx6244@gmail.com>
Date: Thu, 27 Jun 2024 23:17:45 +0200
Subject: dt-bindings: clock: rk3188-cru-common: remove CLK_NR_CLKS

CLK_NR_CLKS should not be part of the binding.
Remove since the kernel code no longer uses it.

Signed-off-by: Johan Jonker <jbx6244@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/6f21c09b-e8d2-4749-aca6-572c79df775d@gmail.com
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 include/dt-bindings/clock/rk3188-cru-common.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/rk3188-cru-common.h b/include/dt-bindings/clock/rk3188-cru-common.h
index afad90680fce..01e14ab252a7 100644
--- a/include/dt-bindings/clock/rk3188-cru-common.h
+++ b/include/dt-bindings/clock/rk3188-cru-common.h
@@ -132,8 +132,6 @@
 #define HCLK_VDPU		472
 #define HCLK_HDMI		473
 
-#define CLK_NR_CLKS		(HCLK_HDMI + 1)
-
 /* soft-reset indices */
 #define SRST_MCORE		2
 #define SRST_CORE0		3
-- 
cgit v1.2.3


From caa93b7c25945d302689de07bd404655db93ae6e Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Wed, 3 Jul 2024 13:18:49 +0100
Subject: ethtool: move firmware flashing flag to struct ethtool_netdev_state

Commit 31e0aa99dc02 ("ethtool: Veto some operations during firmware flashing process")
 added a flag module_fw_flash_in_progress to struct net_device.  As
 this is ethtool related state, move it to the recently created
 struct ethtool_netdev_state, accessed via the 'ethtool' member of
 struct net_device.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20240703121849.652893-1-edward.cree@amd.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h   | 2 ++
 include/linux/netdevice.h | 3 ---
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index f74bb0cf8ed1..3a99238ef895 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1107,11 +1107,13 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
  * @rss_lock:		Protects entries in @rss_ctx.  May be taken from
  *			within RTNL.
  * @wol_enabled:	Wake-on-LAN is enabled
+ * @module_fw_flash_in_progress: Module firmware flashing is in progress.
  */
 struct ethtool_netdev_state {
 	struct xarray		rss_ctx;
 	struct mutex		rss_lock;
 	unsigned		wol_enabled:1;
+	unsigned		module_fw_flash_in_progress:1;
 };
 
 struct phy_device;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3c719f0d5f5a..93558645c6d0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1989,8 +1989,6 @@ enum netdev_reg_state {
  *
  *	@threaded:	napi threaded mode is enabled
  *
- *	@module_fw_flash_in_progress:	Module firmware flashing is in progress.
- *
  *	@net_notifier_list:	List of per-net netdev notifier block
  *				that follow this device when it is moved
  *				to another network namespace.
@@ -2376,7 +2374,6 @@ struct net_device {
 	bool			proto_down;
 	bool			threaded;
 
-	unsigned		module_fw_flash_in_progress:1;
 	struct list_head	net_notifier_list;
 
 #if IS_ENABLED(CONFIG_MACSEC)
-- 
cgit v1.2.3


From a61809a33239821d70eba77bd0d6d13c29bbad0d Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Wed, 3 Jul 2024 18:33:14 +0300
Subject: tpm: Address !chip->auth in tpm_buf_append_name()

Unless tpm_chip_bootstrap() was called by the driver, !chip->auth can
cause a null derefence in tpm_buf_append_name().  Thus, address
!chip->auth in tpm_buf_append_name() and remove the fallback
implementation for !TCG_TPM2_HMAC.

Cc: stable@vger.kernel.org # v6.10+
Reported-by: Stefan Berger <stefanb@linux.ibm.com>
Closes: https://lore.kernel.org/linux-integrity/20240617193408.1234365-1-stefanb@linux.ibm.com/
Fixes: d0a25bb961e6 ("tpm: Add HMAC session name/handle append")
Tested-by: Michael Ellerman <mpe@ellerman.id.au> # ppc
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 include/linux/tpm.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 21a67dc9efe8..4d3071e885a0 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -490,11 +490,22 @@ static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle)
 {
 }
 #endif
+
+static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip)
+{
 #ifdef CONFIG_TCG_TPM2_HMAC
+	return chip->auth;
+#else
+	return NULL;
+#endif
+}
 
-int tpm2_start_auth_session(struct tpm_chip *chip);
 void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
 			 u32 handle, u8 *name);
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+int tpm2_start_auth_session(struct tpm_chip *chip);
 void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
 				 u8 attributes, u8 *passphrase,
 				 int passphraselen);
@@ -521,14 +532,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip)
 static inline void tpm2_end_auth_session(struct tpm_chip *chip)
 {
 }
-static inline void tpm_buf_append_name(struct tpm_chip *chip,
-				       struct tpm_buf *buf,
-				       u32 handle, u8 *name)
-{
-	tpm_buf_append_u32(buf, handle);
-	/* count the number of handles in the upper bits of flags */
-	buf->handles++;
-}
 static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip,
 					       struct tpm_buf *buf,
 					       u8 attributes, u8 *passphrase,
-- 
cgit v1.2.3


From 7ca110f2679b7d1f3ac1afc90e6ffbf0af3edf0d Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Wed, 3 Jul 2024 18:47:46 +0300
Subject: tpm: Address !chip->auth in tpm_buf_append_hmac_session*()

Unless tpm_chip_bootstrap() was called by the driver, !chip->auth can
cause a null derefence in tpm_buf_hmac_session*().  Thus, address
!chip->auth in tpm_buf_hmac_session*() and remove the fallback
implementation for !TCG_TPM2_HMAC.

Cc: stable@vger.kernel.org # v6.9+
Reported-by: Stefan Berger <stefanb@linux.ibm.com>
Closes: https://lore.kernel.org/linux-integrity/20240617193408.1234365-1-stefanb@linux.ibm.com/
Fixes: 1085b8276bb4 ("tpm: Add the rest of the session HMAC API")
Tested-by: Michael Ellerman <mpe@ellerman.id.au> # ppc
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 include/linux/tpm.h | 68 ++++++++++++++++-------------------------------------
 1 file changed, 20 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 4d3071e885a0..e93ee8d936a9 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -502,10 +502,6 @@ static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip)
 
 void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
 			 u32 handle, u8 *name);
-
-#ifdef CONFIG_TCG_TPM2_HMAC
-
-int tpm2_start_auth_session(struct tpm_chip *chip);
 void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
 				 u8 attributes, u8 *passphrase,
 				 int passphraselen);
@@ -515,9 +511,27 @@ static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
 						   u8 *passphrase,
 						   int passphraselen)
 {
-	tpm_buf_append_hmac_session(chip, buf, attributes, passphrase,
-				    passphraselen);
+	struct tpm_header *head;
+	int offset;
+
+	if (tpm2_chip_auth(chip)) {
+		tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, passphraselen);
+	} else  {
+		offset = buf->handles * 4 + TPM_HEADER_SIZE;
+		head = (struct tpm_header *)buf->data;
+
+		/*
+		 * If the only sessions are optional, the command tag must change to
+		 * TPM2_ST_NO_SESSIONS.
+		 */
+		if (tpm_buf_length(buf) == offset)
+			head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
+	}
 }
+
+#ifdef CONFIG_TCG_TPM2_HMAC
+
+int tpm2_start_auth_session(struct tpm_chip *chip);
 void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf);
 int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
 				int rc);
@@ -532,48 +546,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip)
 static inline void tpm2_end_auth_session(struct tpm_chip *chip)
 {
 }
-static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip,
-					       struct tpm_buf *buf,
-					       u8 attributes, u8 *passphrase,
-					       int passphraselen)
-{
-	/* offset tells us where the sessions area begins */
-	int offset = buf->handles * 4 + TPM_HEADER_SIZE;
-	u32 len = 9 + passphraselen;
-
-	if (tpm_buf_length(buf) != offset) {
-		/* not the first session so update the existing length */
-		len += get_unaligned_be32(&buf->data[offset]);
-		put_unaligned_be32(len, &buf->data[offset]);
-	} else {
-		tpm_buf_append_u32(buf, len);
-	}
-	/* auth handle */
-	tpm_buf_append_u32(buf, TPM2_RS_PW);
-	/* nonce */
-	tpm_buf_append_u16(buf, 0);
-	/* attributes */
-	tpm_buf_append_u8(buf, 0);
-	/* passphrase */
-	tpm_buf_append_u16(buf, passphraselen);
-	tpm_buf_append(buf, passphrase, passphraselen);
-}
-static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
-						   struct tpm_buf *buf,
-						   u8 attributes,
-						   u8 *passphrase,
-						   int passphraselen)
-{
-	int offset = buf->handles * 4 + TPM_HEADER_SIZE;
-	struct tpm_header *head = (struct tpm_header *) buf->data;
-
-	/*
-	 * if the only sessions are optional, the command tag
-	 * must change to TPM2_ST_NO_SESSIONS
-	 */
-	if (tpm_buf_length(buf) == offset)
-		head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS);
-}
 static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip,
 					     struct tpm_buf *buf)
 {
-- 
cgit v1.2.3


From 87024f5837485c2f9541283747428df54c0f9183 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Mon, 24 Jun 2024 17:58:55 -0700
Subject: mm: memcg: rename soft limit reclaim-related functions

Rename exported function related to the softlimit reclaim to have memcg1_
prefix.

Link: https://lkml.kernel.org/r/20240625005906.106920-4-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7403dd5926eb..83c8327455d8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1121,9 +1121,9 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
 
 void split_page_memcg(struct page *head, int old_order, int new_order);
 
-unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
-						gfp_t gfp_mask,
-						unsigned long *total_scanned);
+unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
+					gfp_t gfp_mask,
+					unsigned long *total_scanned);
 
 #else /* CONFIG_MEMCG */
 
@@ -1572,9 +1572,9 @@ static inline void split_page_memcg(struct page *head, int old_order, int new_or
 }
 
 static inline
-unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
-					    gfp_t gfp_mask,
-					    unsigned long *total_scanned)
+unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
+					gfp_t gfp_mask,
+					unsigned long *total_scanned)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 66d60c428b23c5b171218985b6880958fb7edbe3 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Mon, 24 Jun 2024 17:58:58 -0700
Subject: mm: memcg: move legacy memcg event code into memcontrol-v1.c

Cgroup v1's memory controller contains a pretty complicated event
notifications mechanism which is not used on cgroup v2.  Let's move the
corresponding code into memcontrol-v1.c.

Please, note, that mem_cgroup_event_ratelimit() remains in memcontrol.c,
otherwise it would require exporting too many details on memcg stats
outside of memcontrol.c.

Link: https://lkml.kernel.org/r/20240625005906.106920-7-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 83c8327455d8..588179d29849 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -69,18 +69,6 @@ struct mem_cgroup_id {
 	refcount_t ref;
 };
 
-/*
- * Per memcg event counter is incremented at every pagein/pageout. With THP,
- * it will be incremented by the number of pages. This counter is used
- * to trigger some periodic events. This is straightforward and better
- * than using jiffies etc. to handle periodic memcg event.
- */
-enum mem_cgroup_events_target {
-	MEM_CGROUP_TARGET_THRESH,
-	MEM_CGROUP_TARGET_SOFTLIMIT,
-	MEM_CGROUP_NTARGETS,
-};
-
 struct memcg_vmstats_percpu;
 struct memcg_vmstats;
 struct lruvec_stats_percpu;
-- 
cgit v1.2.3


From 6f1173d6845974aeb654a7f070c0c9f21283e1b3 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Mon, 24 Jun 2024 17:59:04 -0700
Subject: mm: memcg: group cgroup v1 memcg related declarations

Group all cgroup v1-related declarations at the end of memcontrol.h and
mm/memcontrol-v1.h with an intention to put them all together under a
config option later on.  It should make things easier to follow and
maintain too.

Link: https://lkml.kernel.org/r/20240625005906.106920-13-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 144 ++++++++++++++++++++++++---------------------
 1 file changed, 76 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 588179d29849..a70d64ed04f5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -950,39 +950,13 @@ static inline void mem_cgroup_exit_user_fault(void)
 	current->in_user_fault = 0;
 }
 
-static inline bool task_in_memcg_oom(struct task_struct *p)
-{
-	return p->memcg_in_oom;
-}
-
-bool mem_cgroup_oom_synchronize(bool wait);
 struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
 					    struct mem_cgroup *oom_domain);
 void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
 
-void folio_memcg_lock(struct folio *folio);
-void folio_memcg_unlock(struct folio *folio);
-
 void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx,
 		       int val);
 
-/* try to stablize folio_memcg() for all the pages in a memcg */
-static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
-{
-	rcu_read_lock();
-
-	if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
-		return true;
-
-	rcu_read_unlock();
-	return false;
-}
-
-static inline void mem_cgroup_unlock_pages(void)
-{
-	rcu_read_unlock();
-}
-
 /* idx can be of type enum memcg_stat_item or node_stat_item */
 static inline void mod_memcg_state(struct mem_cgroup *memcg,
 				   enum memcg_stat_item idx, int val)
@@ -1109,10 +1083,6 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
 
 void split_page_memcg(struct page *head, int old_order, int new_order);
 
-unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
-					gfp_t gfp_mask,
-					unsigned long *total_scanned);
-
 #else /* CONFIG_MEMCG */
 
 #define MEM_CGROUP_ID_SHIFT	0
@@ -1423,26 +1393,6 @@ mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
 {
 }
 
-static inline void folio_memcg_lock(struct folio *folio)
-{
-}
-
-static inline void folio_memcg_unlock(struct folio *folio)
-{
-}
-
-static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
-{
-	/* to match folio_memcg_rcu() */
-	rcu_read_lock();
-	return true;
-}
-
-static inline void mem_cgroup_unlock_pages(void)
-{
-	rcu_read_unlock();
-}
-
 static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask)
 {
 }
@@ -1455,16 +1405,6 @@ static inline void mem_cgroup_exit_user_fault(void)
 {
 }
 
-static inline bool task_in_memcg_oom(struct task_struct *p)
-{
-	return false;
-}
-
-static inline bool mem_cgroup_oom_synchronize(bool wait)
-{
-	return false;
-}
-
 static inline struct mem_cgroup *mem_cgroup_get_oom_group(
 	struct task_struct *victim, struct mem_cgroup *oom_domain)
 {
@@ -1558,14 +1498,6 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
 static inline void split_page_memcg(struct page *head, int old_order, int new_order)
 {
 }
-
-static inline
-unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
-					gfp_t gfp_mask,
-					unsigned long *total_scanned)
-{
-	return 0;
-}
 #endif /* CONFIG_MEMCG */
 
 /*
@@ -1916,4 +1848,80 @@ static inline bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
 }
 #endif
 
+
+/* Cgroup v1-related declarations */
+
+#ifdef CONFIG_MEMCG
+unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
+					gfp_t gfp_mask,
+					unsigned long *total_scanned);
+
+bool mem_cgroup_oom_synchronize(bool wait);
+
+static inline bool task_in_memcg_oom(struct task_struct *p)
+{
+	return p->memcg_in_oom;
+}
+
+void folio_memcg_lock(struct folio *folio);
+void folio_memcg_unlock(struct folio *folio);
+
+/* try to stablize folio_memcg() for all the pages in a memcg */
+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
+{
+	rcu_read_lock();
+
+	if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
+		return true;
+
+	rcu_read_unlock();
+	return false;
+}
+
+static inline void mem_cgroup_unlock_pages(void)
+{
+	rcu_read_unlock();
+}
+
+#else /* CONFIG_MEMCG */
+static inline
+unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
+					gfp_t gfp_mask,
+					unsigned long *total_scanned)
+{
+	return 0;
+}
+
+static inline void folio_memcg_lock(struct folio *folio)
+{
+}
+
+static inline void folio_memcg_unlock(struct folio *folio)
+{
+}
+
+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
+{
+	/* to match folio_memcg_rcu() */
+	rcu_read_lock();
+	return true;
+}
+
+static inline void mem_cgroup_unlock_pages(void)
+{
+	rcu_read_unlock();
+}
+
+static inline bool task_in_memcg_oom(struct task_struct *p)
+{
+	return false;
+}
+
+static inline bool mem_cgroup_oom_synchronize(bool wait)
+{
+	return false;
+}
+
+#endif /* CONFIG_MEMCG */
+
 #endif /* _LINUX_MEMCONTROL_H */
-- 
cgit v1.2.3


From e93d4166b40a84df83c4d5cb4c709d022808ef9b Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Mon, 24 Jun 2024 17:59:05 -0700
Subject: mm: memcg: put cgroup v1-specific code under a config option

Put legacy cgroup v1 memory controller code under a new CONFIG_MEMCG_V1
config option.  The option is turned off by default.  Nobody except those
who are still using cgroup v1 should turn it on.

If the option is not set, memory controller can still be mounted under
cgroup v1, but none of memcg-specific control files are present.

Please note, that not all cgroup v1's memory controller code is guarded
yet (but most of it), it's a subject for some follow-up work.

Thanks to Michal Hocko for providing a better Kconfig option description.

[roman.gushchin@linux.dev: better config option description provided by Michal]
  Link: https://lkml.kernel.org/r/ZnxXNtvqllc9CDoo@google.com
Link: https://lkml.kernel.org/r/20240625005906.106920-14-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a70d64ed04f5..796cfa842346 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1851,7 +1851,7 @@ static inline bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
 
 /* Cgroup v1-related declarations */
 
-#ifdef CONFIG_MEMCG
+#ifdef CONFIG_MEMCG_V1
 unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
 					gfp_t gfp_mask,
 					unsigned long *total_scanned);
@@ -1883,7 +1883,7 @@ static inline void mem_cgroup_unlock_pages(void)
 	rcu_read_unlock();
 }
 
-#else /* CONFIG_MEMCG */
+#else /* CONFIG_MEMCG_V1 */
 static inline
 unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
 					gfp_t gfp_mask,
@@ -1922,6 +1922,6 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
 	return false;
 }
 
-#endif /* CONFIG_MEMCG */
+#endif /* CONFIG_MEMCG_V1 */
 
 #endif /* _LINUX_MEMCONTROL_H */
-- 
cgit v1.2.3


From 410abb20acaea9679fc1b2276d47e70fba331451 Mon Sep 17 00:00:00 2001
From: Dan Schatzberg <schatzberg.dan@gmail.com>
Date: Wed, 3 Jan 2024 08:48:36 -0800
Subject: mm: add defines for min/max swappiness

Patch series "Add swappiness argument to memory.reclaim", v6.

This patch proposes augmenting the memory.reclaim interface with a
swappiness=<val> argument that overrides the swappiness value for that
instance of proactive reclaim.

Userspace proactive reclaimers use the memory.reclaim interface to trigger
reclaim.  The memory.reclaim interface does not allow for any way to
effect the balance of file vs anon during proactive reclaim.  The only
approach is to adjust the vm.swappiness setting.  However, there are a few
reasons we look to control the balance of file vs anon during proactive
reclaim, separately from reactive reclaim:

* Swapout should be limited to manage SSD write endurance.  In near-OOM
  situations we are fine with lots of swap-out to avoid OOMs.  As these
  are typically rare events, they have relatively little impact on write
  endurance.  However, proactive reclaim runs continuously and so its
  impact on SSD write endurance is more significant.  Therefore it is
  desireable to control swap-out for proactive reclaim separately from
  reactive reclaim

* Some userspace OOM killers like systemd-oomd[1] support OOM killing on
  swap exhaustion.  This makes sense if the swap exhaustion is triggered
  due to reactive reclaim but less so if it is triggered due to proactive
  reclaim (e.g.  one could see OOMs when free memory is ample but anon is
  just particularly cold).  Therefore, it's desireable to have proactive
  reclaim reduce or stop swap-out before the threshold at which OOM
  killing occurs.

In the case of Meta's Senpai proactive reclaimer, we adjust vm.swappiness
before writes to memory.reclaim[2].  This has been in production for
nearly two years and has addressed our needs to control proactive vs
reactive reclaim behavior but is still not ideal for a number of reasons:

* vm.swappiness is a global setting, adjusting it can race/interfere
  with other system administration that wishes to control vm.swappiness.
  In our case, we need to disable Senpai before adjusting vm.swappiness.

* vm.swappiness is stateful - so a crash or restart of Senpai can leave
  a misconfigured setting.  This requires some additional management to
  record the "desired" setting and ensure Senpai always adjusts to it.

With this patch, we avoid these downsides of adjusting vm.swappiness
globally.

Previously, this exact interface addition was proposed by Yosry[3].  In
response, Roman proposed instead an interface to specify precise
file/anon/slab reclaim amounts[4].  More recently Huan also proposed this
as well[5] and others similarly questioned if this was the proper
interface.

Previous proposals sought to use this to allow proactive reclaimers to
effectively perform a custom reclaim algorithm by issuing proactive
reclaim with different settings to control file vs anon reclaim (e.g.  to
only reclaim anon from some applications).  Responses argued that
adjusting swappiness is a poor interface for custom reclaim.

In contrast, I argue in favor of a swappiness setting not as a way to
implement custom reclaim algorithms but rather to bias the balance of anon
vs file due to differences of proactive vs reactive reclaim.  In this
context, swappiness is the existing interface for controlling this balance
and this patch simply allows for it to be configured differently for
proactive vs reactive reclaim.

Specifying explicit amounts of anon vs file pages to reclaim feels
inappropriate for this prupose.  Proactive reclaimers are un-aware of the
relative age of file vs anon for a cgroup which makes it difficult to
manage proactive reclaim of different memory pools.  A proactive reclaimer
would need some amount of anon reclaim attempts separate from the amount
of file reclaim attempts which seems brittle given that it's difficult to
observe the impact.

[1]https://www.freedesktop.org/software/systemd/man/latest/systemd-oomd.service.html
[2]https://github.com/facebookincubator/oomd/blob/main/src/oomd/plugins/Senpai.cpp#L585-L598
[3]https://lore.kernel.org/linux-mm/CAJD7tkbDpyoODveCsnaqBBMZEkDvshXJmNdbk51yKSNgD7aGdg@mail.gmail.com/
[4]https://lore.kernel.org/linux-mm/YoPHtHXzpK51F%2F1Z@carbon/
[5]https://lore.kernel.org/lkml/20231108065818.19932-1-link@vivo.com/


This patch (of 2):

We use the constants 0 and 200 in a few places in the mm code when
referring to the min and max swappiness.  This patch adds MIN_SWAPPINESS
and MAX_SWAPPINESS #defines to improve clarity.  There are no functional
changes.

Link: https://lkml.kernel.org/r/20240103164841.2800183-1-schatzberg.dan@gmail.com
Link: https://lkml.kernel.org/r/20240103164841.2800183-2-schatzberg.dan@gmail.com
Signed-off-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Chris Li <chrisl@kernel.org>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Yue Zhao <findns94@gmail.com>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index d33ce740b695..e97db0fa7659 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -404,6 +404,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 
 #define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
 #define MEMCG_RECLAIM_PROACTIVE (1 << 2)
+#define MIN_SWAPPINESS 0
+#define MAX_SWAPPINESS 200
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 						  unsigned long nr_pages,
 						  gfp_t gfp_mask,
-- 
cgit v1.2.3


From 68cd9050d871e4db5433420b5ceb32f5512d18bc Mon Sep 17 00:00:00 2001
From: Dan Schatzberg <schatzberg.dan@gmail.com>
Date: Wed, 3 Jan 2024 08:48:37 -0800
Subject: mm: add swappiness= arg to memory.reclaim

Allow proactive reclaimers to submit an additional swappiness=<val>
argument to memory.reclaim.  This overrides the global or per-memcg
swappiness setting for that reclaim attempt.

For example:

echo "2M swappiness=0" > /sys/fs/cgroup/memory.reclaim

will perform reclaim on the rootcg with a swappiness setting of 0 (no
swap) regardless of the vm.swappiness sysctl setting.

Userspace proactive reclaimers use the memory.reclaim interface to trigger
reclaim.  The memory.reclaim interface does not allow for any way to
effect the balance of file vs anon during proactive reclaim.  The only
approach is to adjust the vm.swappiness setting.  However, there are a few
reasons we look to control the balance of file vs anon during proactive
reclaim, separately from reactive reclaim:

* Swapout should be limited to manage SSD write endurance.  In near-OOM
  situations we are fine with lots of swap-out to avoid OOMs.  As these
  are typically rare events, they have relatively little impact on write
  endurance.  However, proactive reclaim runs continuously and so its
  impact on SSD write endurance is more significant.  Therefore it is
  desireable to control swap-out for proactive reclaim separately from
  reactive reclaim

* Some userspace OOM killers like systemd-oomd[1] support OOM killing on
  swap exhaustion.  This makes sense if the swap exhaustion is triggered
  due to reactive reclaim but less so if it is triggered due to proactive
  reclaim (e.g.  one could see OOMs when free memory is ample but anon is
  just particularly cold).  Therefore, it's desireable to have proactive
  reclaim reduce or stop swap-out before the threshold at which OOM
  killing occurs.

In the case of Meta's Senpai proactive reclaimer, we adjust vm.swappiness
before writes to memory.reclaim[2].  This has been in production for
nearly two years and has addressed our needs to control proactive vs
reactive reclaim behavior but is still not ideal for a number of reasons:

* vm.swappiness is a global setting, adjusting it can race/interfere
  with other system administration that wishes to control vm.swappiness.
  In our case, we need to disable Senpai before adjusting vm.swappiness.

* vm.swappiness is stateful - so a crash or restart of Senpai can leave
  a misconfigured setting.  This requires some additional management to
  record the "desired" setting and ensure Senpai always adjusts to it.

With this patch, we avoid these downsides of adjusting vm.swappiness
globally.

[1]https://www.freedesktop.org/software/systemd/man/latest/systemd-oomd.service.html
[2]https://github.com/facebookincubator/oomd/blob/main/src/oomd/plugins/Senpai.cpp#L585-L598

Link: https://lkml.kernel.org/r/20240103164841.2800183-3-schatzberg.dan@gmail.com
Signed-off-by: Dan Schatzberg <schatzberg.dan@gmail.com>
Suggested-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Chris Li <chrisl@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Yue Zhao <findns94@gmail.com>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index e97db0fa7659..3df75d62a835 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -409,7 +409,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 						  unsigned long nr_pages,
 						  gfp_t gfp_mask,
-						  unsigned int reclaim_options);
+						  unsigned int reclaim_options,
+						  int *swappiness);
 extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
 						gfp_t gfp_mask, bool noswap,
 						pg_data_t *pgdat,
-- 
cgit v1.2.3


From 773e9ae77fe777ac09739d8c32b32fff147f2d66 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Fri, 28 Jun 2024 21:03:10 +0000
Subject: mm: memcg: factor out legacy socket memory accounting code

Move out the legacy cgroup v1 socket memory accounting code into
mm/memcontrol-v1.c.

This commit introduces three new functions: memcg1_tcpmem_active(),
memcg1_charge_skmem() and memcg1_uncharge_skmem(), which contain all
cgroup v1-specific code and become trivial if CONFIG_MEMCG_V1 isn't set.

Note, that !!memcg->tcpmem_pressure check in
mem_cgroup_under_socket_pressure() can't be easily moved into
memcontrol-v1.h without including memcontrol-v1.h from memcontrol.h which
isn't a good idea, so it's better to just #ifdef it.

Link: https://lkml.kernel.org/r/20240628210317.272856-3-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 796cfa842346..44ab6394c9ed 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1650,8 +1650,10 @@ void mem_cgroup_sk_alloc(struct sock *sk);
 void mem_cgroup_sk_free(struct sock *sk);
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
+#ifdef CONFIG_MEMCG_V1
 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
 		return !!memcg->tcpmem_pressure;
+#endif /* CONFIG_MEMCG_V1 */
 	do {
 		if (time_before(jiffies, READ_ONCE(memcg->socket_pressure)))
 			return true;
-- 
cgit v1.2.3


From 94b7e5bf09b08aa4cce4625d0a4b307399b77e2c Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Fri, 28 Jun 2024 21:03:14 +0000
Subject: mm: memcg: put memcg1-specific struct mem_cgroup's members under
 CONFIG_MEMCG_V1

Put memcg1-specific members of struct mem_cgroup under the CONFIG_MEMCG_V1
config option.  Also group them close to the end of struct mem_cgroup just
before the dynamic per-node part.

Link: https://lkml.kernel.org/r/20240628210317.272856-7-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 103 +++++++++++++++++++++++----------------------
 1 file changed, 53 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 44ab6394c9ed..107b0c5d6eab 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -188,10 +188,6 @@ struct mem_cgroup {
 		struct page_counter memsw;	/* v1 only */
 	};
 
-	/* Legacy consumer-oriented counters */
-	struct page_counter kmem;		/* v1 only */
-	struct page_counter tcpmem;		/* v1 only */
-
 	/* Range enforcement for interrupt charges */
 	struct work_struct high_work;
 
@@ -205,8 +201,6 @@ struct mem_cgroup {
 	bool zswap_writeback;
 #endif
 
-	unsigned long soft_limit;
-
 	/* vmpressure notifications */
 	struct vmpressure vmpressure;
 
@@ -215,13 +209,7 @@ struct mem_cgroup {
 	 */
 	bool oom_group;
 
-	/* protected by memcg_oom_lock */
-	bool		oom_lock;
-	int		under_oom;
-
-	int	swappiness;
-	/* OOM-Killer disable */
-	int		oom_kill_disable;
+	int swappiness;
 
 	/* memory.events and memory.events.local */
 	struct cgroup_file events_file;
@@ -230,27 +218,6 @@ struct mem_cgroup {
 	/* handle for "memory.swap.events" */
 	struct cgroup_file swap_events_file;
 
-	/* protect arrays of thresholds */
-	struct mutex thresholds_lock;
-
-	/* thresholds for memory usage. RCU-protected */
-	struct mem_cgroup_thresholds thresholds;
-
-	/* thresholds for mem+swap usage. RCU-protected */
-	struct mem_cgroup_thresholds memsw_thresholds;
-
-	/* For oom notifier event fd */
-	struct list_head oom_notify;
-
-	/*
-	 * Should we move charges of a task when a task is moved into this
-	 * mem_cgroup ? And what type of charges should we move ?
-	 */
-	unsigned long move_charge_at_immigrate;
-	/* taken only while moving_account > 0 */
-	spinlock_t		move_lock;
-	unsigned long		move_lock_flags;
-
 	CACHELINE_PADDING(_pad1_);
 
 	/* memory.stat */
@@ -267,10 +234,6 @@ struct mem_cgroup {
 	 */
 	unsigned long		socket_pressure;
 
-	/* Legacy tcp memory accounting */
-	bool			tcpmem_active;
-	int			tcpmem_pressure;
-
 #ifdef CONFIG_MEMCG_KMEM
 	int kmemcg_id;
 	/*
@@ -284,14 +247,6 @@ struct mem_cgroup {
 	struct list_head objcg_list;
 #endif
 
-	CACHELINE_PADDING(_pad2_);
-
-	/*
-	 * set > 0 if pages under this cgroup are moving to other cgroup.
-	 */
-	atomic_t		moving_account;
-	struct task_struct	*move_lock_task;
-
 	struct memcg_vmstats_percpu __percpu *vmstats_percpu;
 
 #ifdef CONFIG_CGROUP_WRITEBACK
@@ -300,10 +255,6 @@ struct mem_cgroup {
 	struct memcg_cgwb_frn cgwb_frn[MEMCG_CGWB_FRN_CNT];
 #endif
 
-	/* List of events which userspace want to receive */
-	struct list_head event_list;
-	spinlock_t event_list_lock;
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	struct deferred_split deferred_split_queue;
 #endif
@@ -313,6 +264,58 @@ struct mem_cgroup {
 	struct lru_gen_mm_list mm_list;
 #endif
 
+#ifdef CONFIG_MEMCG_V1
+	/* Legacy consumer-oriented counters */
+	struct page_counter kmem;		/* v1 only */
+	struct page_counter tcpmem;		/* v1 only */
+
+	unsigned long soft_limit;
+
+	/* protected by memcg_oom_lock */
+	bool oom_lock;
+	int under_oom;
+
+	/* OOM-Killer disable */
+	int oom_kill_disable;
+
+	/* protect arrays of thresholds */
+	struct mutex thresholds_lock;
+
+	/* thresholds for memory usage. RCU-protected */
+	struct mem_cgroup_thresholds thresholds;
+
+	/* thresholds for mem+swap usage. RCU-protected */
+	struct mem_cgroup_thresholds memsw_thresholds;
+
+	/* For oom notifier event fd */
+	struct list_head oom_notify;
+
+	/*
+	 * Should we move charges of a task when a task is moved into this
+	 * mem_cgroup ? And what type of charges should we move ?
+	 */
+	unsigned long move_charge_at_immigrate;
+	/* taken only while moving_account > 0 */
+	spinlock_t move_lock;
+	unsigned long move_lock_flags;
+
+	/* Legacy tcp memory accounting */
+	bool tcpmem_active;
+	int tcpmem_pressure;
+
+	CACHELINE_PADDING(_pad2_);
+
+	/*
+	 * set > 0 if pages under this cgroup are moving to other cgroup.
+	 */
+	atomic_t moving_account;
+	struct task_struct *move_lock_task;
+
+	/* List of events which userspace want to receive */
+	struct list_head event_list;
+	spinlock_t event_list_lock;
+#endif /* CONFIG_MEMCG_V1 */
+
 	struct mem_cgroup_per_node *nodeinfo[];
 };
 
-- 
cgit v1.2.3


From 98c9daf5ae6be008f78c07b744bcff7bcc6e98da Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Fri, 28 Jun 2024 21:03:15 +0000
Subject: mm: memcg: guard memcg1-specific members of struct
 mem_cgroup_per_node

Put memcg1-specific members of struct mem_cgroup_per_node under the
CONFIG_MEMCG_V1 config option.

Link: https://lkml.kernel.org/r/20240628210317.272856-8-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 107b0c5d6eab..c7ef628ee882 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -92,6 +92,7 @@ struct mem_cgroup_per_node {
 	struct lruvec_stats			*lruvec_stats;
 	struct shrinker_info __rcu	*shrinker_info;
 
+#ifdef CONFIG_MEMCG_V1
 	/*
 	 * Memcg-v1 only stuff in middle as buffer between read mostly fields
 	 * and update often fields to avoid false sharing. Once v1 stuff is
@@ -102,6 +103,7 @@ struct mem_cgroup_per_node {
 	unsigned long		usage_in_excess;/* Set to the value by which */
 						/* the soft limit is exceeded*/
 	bool			on_tree;
+#endif
 
 	/* Fields which get updated often at the end. */
 	struct lruvec		lruvec;
-- 
cgit v1.2.3


From 1c3a0b3d0b11fb98c40360f849563185218c2dd4 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Fri, 28 Jun 2024 21:03:16 +0000
Subject: mm: memcg: put struct task_struct::memcg_in_oom under CONFIG_MEMCG_V1

The memcg_in_oom field of the struct task_struct is not used by the cgroup
v2's memory controller, so it can be happily compiled out if
CONFIG_MEMCG_V1 is not set.

Link: https://lkml.kernel.org/r/20240628210317.272856-9-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61591ac6eab6..2a16023e8620 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1447,9 +1447,11 @@ struct task_struct {
 	unsigned int			kcov_softirq;
 #endif
 
-#ifdef CONFIG_MEMCG
+#ifdef CONFIG_MEMCG_V1
 	struct mem_cgroup		*memcg_in_oom;
+#endif
 
+#ifdef CONFIG_MEMCG
 	/* Number of pages to reclaim on returning to userland: */
 	unsigned int			memcg_nr_pages_over_high;
 
-- 
cgit v1.2.3


From 1419ff984aad4c08d121793f71a520b432ead88a Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Fri, 28 Jun 2024 21:03:17 +0000
Subject: mm: memcg: put struct task_struct::in_user_fault under
 CONFIG_MEMCG_V1

The struct task_struct's in_user_fault member is not used by the cgroup
v2's memory controller, so it can be put under the CONFIG_MEMCG_V1 config
option.  To do so, mem_cgroup_enter_user_fault() and
mem_cgroup_exit_user_fault() are moved under the CONFIG_MEMCG_V1 option as
well.

Link: https://lkml.kernel.org/r/20240628210317.272856-10-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 40 ++++++++++++++++++++--------------------
 include/linux/sched.h      |  2 +-
 2 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c7ef628ee882..d0c9365ff039 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -943,18 +943,6 @@ void mem_cgroup_print_oom_context(struct mem_cgroup *memcg,
 
 void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg);
 
-static inline void mem_cgroup_enter_user_fault(void)
-{
-	WARN_ON(current->in_user_fault);
-	current->in_user_fault = 1;
-}
-
-static inline void mem_cgroup_exit_user_fault(void)
-{
-	WARN_ON(!current->in_user_fault);
-	current->in_user_fault = 0;
-}
-
 struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
 					    struct mem_cgroup *oom_domain);
 void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
@@ -1402,14 +1390,6 @@ static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask)
 {
 }
 
-static inline void mem_cgroup_enter_user_fault(void)
-{
-}
-
-static inline void mem_cgroup_exit_user_fault(void)
-{
-}
-
 static inline struct mem_cgroup *mem_cgroup_get_oom_group(
 	struct task_struct *victim, struct mem_cgroup *oom_domain)
 {
@@ -1890,6 +1870,18 @@ static inline void mem_cgroup_unlock_pages(void)
 	rcu_read_unlock();
 }
 
+static inline void mem_cgroup_enter_user_fault(void)
+{
+	WARN_ON(current->in_user_fault);
+	current->in_user_fault = 1;
+}
+
+static inline void mem_cgroup_exit_user_fault(void)
+{
+	WARN_ON(!current->in_user_fault);
+	current->in_user_fault = 0;
+}
+
 #else /* CONFIG_MEMCG_V1 */
 static inline
 unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
@@ -1929,6 +1921,14 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
 	return false;
 }
 
+static inline void mem_cgroup_enter_user_fault(void)
+{
+}
+
+static inline void mem_cgroup_exit_user_fault(void)
+{
+}
+
 #endif /* CONFIG_MEMCG_V1 */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2a16023e8620..a7770c566c4d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -934,7 +934,7 @@ struct task_struct {
 #ifndef TIF_RESTORE_SIGMASK
 	unsigned			restore_sigmask:1;
 #endif
-#ifdef CONFIG_MEMCG
+#ifdef CONFIG_MEMCG_V1
 	unsigned			in_user_fault:1;
 #endif
 #ifdef CONFIG_LRU_GEN
-- 
cgit v1.2.3


From 50183ac2cfb54e027dd36fb22ea1bd1e91e3a08b Mon Sep 17 00:00:00 2001
From: Ram Prakash Gupta <quic_rampraka@quicinc.com>
Date: Thu, 27 Jun 2024 14:07:55 +0530
Subject: scsi: ufs: core: Suspend clk scaling on no request

Currently UFS clk scaling is getting suspended only when the clks are
scaled down. When high load is generated, a huge amount of latency is added
due to scaling up the clk and completing the request post that.

Suspending the scaling in its existing state when high load is generated
improves the random performance KPI by 28%. So suspending the scaling when
there are no requests. And the clk would be put in low scaled state when
the actual request load is low.

Make this change optional by having the check enabled using vops since for
some devices suspending without bringing the clk in low scaled state might
have impact on power consumption of the SoC.

Signed-off-by: Ram Prakash Gupta <quic_rampraka@quicinc.com>
Link: https://lore.kernel.org/r/20240627083756.25340-2-quic_rampraka@quicinc.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 9e0581115b34..049520bcc6c0 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -459,6 +459,7 @@ struct ufs_clk_scaling {
 	bool is_initialized;
 	bool is_busy_started;
 	bool is_suspended;
+	bool suspend_on_no_request;
 };
 
 #define UFS_EVENT_HIST_LENGTH 8
-- 
cgit v1.2.3


From 81e7706345f06e1e97a092f59697b7e20a0ee868 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 4 Jul 2024 14:28:14 +0900
Subject: dm: handle REQ_OP_ZONE_RESET_ALL

This commit implements processing of the REQ_OP_ZONE_RESET_ALL operation
for zoned mapped devices. Given that this operation always has a BIO
sector of 0 and a 0 size, processing through the regular BIO
__split_and_process_bio() function does not work because this function
would always select the first target. Instead, handling of this
operation is implemented using the function __send_zone_reset_all().

Similarly to the __send_empty_flush() function, the new
__send_zone_reset_all() function manually goes through all targets of a
mapped device table doing the following:
1) If the target can natively support REQ_OP_ZONE_RESET_ALL,
   __send_duplicate_bios() is used to forward the reset all operation to
   the target. This case is handled with the
   __send_zone_reset_all_native() function.
2) For other targets, the function __send_zone_reset_all_emulated() is
   executed to emulate the execution of REQ_OP_ZONE_RESET_ALL using
   regular REQ_OP_ZONE_RESET operations.

Targets that can natively support REQ_OP_ZONE_RESET_ALL are identified
using the new target field zone_reset_all_supported. This boolean is set
to true in for targets that have reliable zone limits, that is, targets
that map all sequential write required zones of their zoned device(s).
Setting this field is handled in dm_set_zones_restrictions() and
device_get_zone_resource_limits().

For targets with unreliable zone limits, REQ_OP_ZONE_RESET_ALL must be
emulated (case 2 above). This is implemented with
__send_zone_reset_all_emulated() and is similar to the block layer
function blkdev_zone_reset_all_emulated(): first a report zones is done
for the zones of the target to identify zones that need reset, that is,
any sequential write required zone that is not already empty. This is
done using a bitmap and the function dm_zone_get_reset_bitmap() which
sets to 1 the bit corresponding to a zone that needs reset. Next, this
zone bitmap is inspected and a clone BIO modified to use the
REQ_OP_ZONE_RESET operation issued for any zone with its bit set in the
zone bitmap.

This implementation is more efficient than what the block layer does
with blkdev_zone_reset_all_emulated(), which is always used for DM zoned
devices currently: as we can natively use REQ_OP_ZONE_RESET_ALL on
targets mapping all sequential write required zones, resetting all zones
of a zoned mapped device can be much faster compared to always emulating
this operation using regular per-zone reset. In the worst case, this
implementation is as-efficient as the block layer emulation. This
reduction in the time it takes to reset all zones of a zoned mapped
device depends directly on the mapped device targets mapping (reliable
zone limits or not).

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240704052816.623865-4-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/device-mapper.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 82b2195efaca..15d28164bbbd 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -357,6 +357,13 @@ struct dm_target {
 	 */
 	bool discards_supported:1;
 
+	/*
+	 * Automatically set by dm-core if this target supports
+	 * REQ_OP_ZONE_RESET_ALL. Otherwise, this operation will be emulated
+	 * using REQ_OP_ZONE_RESET. Target drivers must not set this manually.
+	 */
+	bool zone_reset_all_supported:1;
+
 	/*
 	 * Set if this target requires that discards be split on
 	 * 'max_discard_sectors' boundaries.
-- 
cgit v1.2.3


From f2a7bea23710fceb99dac6da4ef82c3cc8932f7f Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 4 Jul 2024 14:28:15 +0900
Subject: block: Remove REQ_OP_ZONE_RESET_ALL emulation

Now that device mapper can handle resetting all zones of a mapped zoned
device using REQ_OP_ZONE_RESET_ALL, all zoned block device drivers
support this operation. With this, the request queue feature
BLK_FEAT_ZONE_RESETALL is not necessary and the emulation code in
blk-zone.c can be removed.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240704052816.623865-5-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4d0d4b83bc74..dc250d8070d2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -318,9 +318,6 @@ typedef unsigned int __bitwise blk_features_t;
 /* is a zoned device */
 #define BLK_FEAT_ZONED			((__force blk_features_t)(1u << 10))
 
-/* supports Zone Reset All */
-#define BLK_FEAT_ZONE_RESETALL		((__force blk_features_t)(1u << 11))
-
 /* supports PCI(e) p2p requests */
 #define BLK_FEAT_PCI_P2PDMA		((__force blk_features_t)(1u << 12))
 
@@ -618,8 +615,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	(!((q)->limits.features & BLK_FEAT_ROTATIONAL))
 #define blk_queue_io_stat(q)	((q)->limits.features & BLK_FEAT_IO_STAT)
-#define blk_queue_zone_resetall(q)	\
-	((q)->limits.features & BLK_FEAT_ZONE_RESETALL)
 #define blk_queue_dax(q)	((q)->limits.features & BLK_FEAT_DAX)
 #define blk_queue_pci_p2pdma(q)	((q)->limits.features & BLK_FEAT_PCI_P2PDMA)
 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
-- 
cgit v1.2.3


From bf86bcdb40123ee99669ee91b67e023669433a1a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 1 Jul 2024 18:51:20 +0200
Subject: blk-lib: check for kill signal in ioctl BLKZEROOUT

Zeroout can access a significant capacity and take longer than the user
expected.  A user may change their mind about wanting to run that
command and attempt to kill the process and do something else with their
device. But since the task is uninterruptable, they have to wait for it
to finish, which could be many hours.

Add a new BLKDEV_ZERO_KILLABLE flag for blkdev_issue_zeroout that checks
for a fatal signal at each iteration so the user doesn't have to wait for
their regretted operation to complete naturally.

Heavily based on an earlier patch from Keith Busch.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240701165219.1571322-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dc250d8070d2..02e04df27282 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1095,6 +1095,7 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
 
 #define BLKDEV_ZERO_NOUNMAP	(1 << 0)  /* do not free blocks */
 #define BLKDEV_ZERO_NOFALLBACK	(1 << 1)  /* don't write explicit zeroes */
+#define BLKDEV_ZERO_KILLABLE	(1 << 2)  /* interruptible by fatal signals */
 
 extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
-- 
cgit v1.2.3


From f37bee9508885ce5edd4875c744e1af28cfac76c Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Mon, 1 Jul 2024 21:28:32 +0300
Subject: net: pcs: xpcs: Move native device ID macro to linux/pcs/pcs-xpcs.h

One of the next commits will alter the DW XPCS driver to support setting a
custom device ID for the particular MDIO-device detected on the platform.
The generic DW XPCS ID can be used as a custom ID as well in case if the
DW XPCS-device was erroneously synthesized with no or some undefined ID.
In addition to that having all supported DW XPCS device IDs defined in a
single place will improve the code maintainability and readability.

Note while at it rename the macros to being shorter and looking alike to
the already defined NXP XPCS ID macro.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pcs/pcs-xpcs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index da3a6c30f6d2..8dfe90295f12 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -12,6 +12,8 @@
 
 #define NXP_SJA1105_XPCS_ID		0x00000010
 #define NXP_SJA1110_XPCS_ID		0x00000020
+#define DW_XPCS_ID			0x7996ced0
+#define DW_XPCS_ID_MASK			0xffffffff
 
 /* AN mode */
 #define DW_AN_C73			1
-- 
cgit v1.2.3


From 71b200b388ef2ff1b547114f17dc1fd088bfc2c6 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Mon, 1 Jul 2024 21:28:34 +0300
Subject: net: pcs: xpcs: Convert xpcs_id to dw_xpcs_desc

A structure with the PCS/PMA MMD IDs data is being introduced in one of
the next commits. In order to prevent the names ambiguity let's convert
the xpcs_id structure name to dw_xpcs_desc. The later version is more
suitable since the structure content is indeed the device descriptor
containing the data and callbacks required for the driver to correctly set
the device up.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pcs/pcs-xpcs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 8dfe90295f12..e706bd16b986 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -28,11 +28,11 @@
 /* dev_flag */
 #define DW_DEV_TXGBE			BIT(0)
 
-struct xpcs_id;
+struct dw_xpcs_desc;
 
 struct dw_xpcs {
+	const struct dw_xpcs_desc *desc;
 	struct mdio_device *mdiodev;
-	const struct xpcs_id *id;
 	struct phylink_pcs pcs;
 	phy_interface_t interface;
 	int dev_flag;
-- 
cgit v1.2.3


From bcac735cf653ef8dc6d7c08ed311e0a77f2665f0 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Mon, 1 Jul 2024 21:28:36 +0300
Subject: net: pcs: xpcs: Introduce DW XPCS info structure

The being introduced structure will preserve the PCS and PMA IDs retrieved
from the respective DW XPCS MMDs or potentially pre-defined by the client
drivers. (The later change will be introduced later in the framework of
the commit adding the memory-mapped DW XPCS devices support.)

The structure fields are filled in in the xpcs_get_id() function, which
used to be responsible for the PCS Device ID getting only. Besides of the
PCS ID the method now fetches the PMA/PMD IDs too from MMD 1, which used
to be done in xpcs_dev_flag(). The retrieved PMA ID will be from now
utilized for the PMA-specific tweaks like it was introduced for the
Wangxun TxGBE PCS in the commit f629acc6f210 ("net: pcs: xpcs: support to
switch mode for Wangxun NICs").

Note 1. The xpcs_get_id() error-handling semantics has been changed. From
now the error number will be returned from the function. There is no point
in the next IOs or saving 0xffs and then looping over the actual device
IDs if device couldn't be reached. -ENODEV will be returned if the very
first IO operation failed thus indicating that no device could be found.

Note 2. The PCS and PMA IDs macros have been converted to enum'es. The
enum'es will be populated later in another commit with the virtual IDs
identifying the DW XPCS devices which have some platform-specifics, but
have been synthesized with the default PCS/PMA ID.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pcs/pcs-xpcs.h | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index e706bd16b986..1dc60f5e653f 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -9,11 +9,7 @@
 
 #include <linux/phy.h>
 #include <linux/phylink.h>
-
-#define NXP_SJA1105_XPCS_ID		0x00000010
-#define NXP_SJA1110_XPCS_ID		0x00000020
-#define DW_XPCS_ID			0x7996ced0
-#define DW_XPCS_ID_MASK			0xffffffff
+#include <linux/types.h>
 
 /* AN mode */
 #define DW_AN_C73			1
@@ -22,20 +18,30 @@
 #define DW_AN_C37_1000BASEX		4
 #define DW_10GBASER			5
 
-/* device vendor OUI */
-#define DW_OUI_WX			0x0018fc80
+struct dw_xpcs_desc;
 
-/* dev_flag */
-#define DW_DEV_TXGBE			BIT(0)
+enum dw_xpcs_pcs_id {
+	NXP_SJA1105_XPCS_ID = 0x00000010,
+	NXP_SJA1110_XPCS_ID = 0x00000020,
+	DW_XPCS_ID = 0x7996ced0,
+	DW_XPCS_ID_MASK = 0xffffffff,
+};
 
-struct dw_xpcs_desc;
+enum dw_xpcs_pma_id {
+	WX_TXGBE_XPCS_PMA_10G_ID = 0x0018fc80,
+};
+
+struct dw_xpcs_info {
+	u32 pcs;
+	u32 pma;
+};
 
 struct dw_xpcs {
+	struct dw_xpcs_info info;
 	const struct dw_xpcs_desc *desc;
 	struct mdio_device *mdiodev;
 	struct phylink_pcs pcs;
 	phy_interface_t interface;
-	int dev_flag;
 };
 
 int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
-- 
cgit v1.2.3


From f6bb3e9d98c2e8d70587d5ddaf9426ef30d7865c Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Mon, 1 Jul 2024 21:28:38 +0300
Subject: net: pcs: xpcs: Add Synopsys DW xPCS platform device driver

Synopsys DesignWare XPCS IP-core can be synthesized with the device CSRs
being accessible over the MCI or APB3 interface instead of the MDIO bus
(see the CSR_INTERFACE HDL parameter). Thus all the PCS registers can be
just memory mapped and be a subject of the standard MMIO operations of
course taking into account the peculiarities of the Clause C45 CSRs
mapping. From that perspective the DW XPCS devices would look as just
normal platform devices for the kernel.

On the other hand in order to have the DW XPCS devices handled by the
pcs-xpcs.c driver they need to be registered in the framework of the
MDIO-subsystem. So the suggested change is about providing a DW XPCS
platform device driver registering a virtual MDIO-bus with a single
MDIO-device representing the DW XPCS device.

DW XPCS platform device is supposed to be described by the respective
compatible string "snps,dw-xpcs" (or with the PMA-specific compatible
string), CSRs memory space and optional peripheral bus and reference clock
sources. Depending on the INDIRECT_ACCESS IP-core synthesize parameter the
memory-mapped reg-space can be represented as either directly or
indirectly mapped Clause 45 space. In the former case the particular
address is determined based on the MMD device and the registers offset (5
+ 16 bits all together) within the device reg-space. In the later case
there is only 8 lower address bits are utilized for the registers mapping
(255 CSRs). The upper bits are supposed to be written into the respective
viewport CSR in order to select the respective MMD sub-page.

Note, only the peripheral bus clock source is requested in the platform
device probe procedure. The core and pad clocks handling has been
implemented in the framework of the xpcs_create() method intentionally
since the clocks-related setups are supposed to be performed later, during
the DW XPCS main configuration procedures. (For instance they will be
required for the DW Gen5 10G PMA configuration.)

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pcs/pcs-xpcs.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 1dc60f5e653f..813be644647f 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -7,6 +7,8 @@
 #ifndef __LINUX_PCS_XPCS_H
 #define __LINUX_PCS_XPCS_H
 
+#include <linux/clk.h>
+#include <linux/mdio.h>
 #include <linux/phy.h>
 #include <linux/phylink.h>
 #include <linux/types.h>
@@ -21,6 +23,7 @@
 struct dw_xpcs_desc;
 
 enum dw_xpcs_pcs_id {
+	DW_XPCS_ID_NATIVE = 0,
 	NXP_SJA1105_XPCS_ID = 0x00000010,
 	NXP_SJA1110_XPCS_ID = 0x00000020,
 	DW_XPCS_ID = 0x7996ced0,
@@ -28,6 +31,14 @@ enum dw_xpcs_pcs_id {
 };
 
 enum dw_xpcs_pma_id {
+	DW_XPCS_PMA_ID_NATIVE = 0,
+	DW_XPCS_PMA_GEN1_3G_ID,
+	DW_XPCS_PMA_GEN2_3G_ID,
+	DW_XPCS_PMA_GEN2_6G_ID,
+	DW_XPCS_PMA_GEN4_3G_ID,
+	DW_XPCS_PMA_GEN4_6G_ID,
+	DW_XPCS_PMA_GEN5_10G_ID,
+	DW_XPCS_PMA_GEN5_12G_ID,
 	WX_TXGBE_XPCS_PMA_10G_ID = 0x0018fc80,
 };
 
@@ -36,10 +47,17 @@ struct dw_xpcs_info {
 	u32 pma;
 };
 
+enum dw_xpcs_clock {
+	DW_XPCS_CORE_CLK,
+	DW_XPCS_PAD_CLK,
+	DW_XPCS_NUM_CLKS,
+};
+
 struct dw_xpcs {
 	struct dw_xpcs_info info;
 	const struct dw_xpcs_desc *desc;
 	struct mdio_device *mdiodev;
+	struct clk_bulk_data clks[DW_XPCS_NUM_CLKS];
 	struct phylink_pcs pcs;
 	phy_interface_t interface;
 };
-- 
cgit v1.2.3


From 9cad7275463ab9e52aeae8d2ecb169afee6876f3 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Mon, 1 Jul 2024 21:28:39 +0300
Subject: net: pcs: xpcs: Add fwnode-based descriptor creation method

It's now possible to have the DW XPCS device defined as a standard
platform device for instance in the platform DT-file. Although that
functionality is useless unless there is a way to have the device found by
the client drivers (STMMAC/DW *MAC, NXP SJA1105 Eth Switch, etc). Provide
such ability by means of the xpcs_create_fwnode() method. It needs to be
called with the device DW XPCS fwnode instance passed. That node will be
then used to find the MDIO-device instance in order to create the DW XPCS
descriptor.

Note the method semantics and name is similar to what has been recently
introduced in the Lynx PCS driver.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pcs/pcs-xpcs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 813be644647f..b4a4eb6c8866 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -8,6 +8,7 @@
 #define __LINUX_PCS_XPCS_H
 
 #include <linux/clk.h>
+#include <linux/fwnode.h>
 #include <linux/mdio.h>
 #include <linux/phy.h>
 #include <linux/phylink.h>
@@ -72,6 +73,8 @@ int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
 		    int enable);
 struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr,
 				    phy_interface_t interface);
+struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode,
+				   phy_interface_t interface);
 void xpcs_destroy(struct dw_xpcs *xpcs);
 
 #endif /* __LINUX_PCS_XPCS_H */
-- 
cgit v1.2.3


From 351066bad6ade5ad0f5f90fde2dc759759959969 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Mon, 1 Jul 2024 21:28:40 +0300
Subject: net: stmmac: Create DW XPCS device with particular address

Currently the only STMMAC platform driver using the DW XPCS code is the
Intel mGBE device driver. (It can be determined by finding all the drivers
having the stmmac_mdio_bus_data::has_xpcs flag set.) At the same time the
low-level platform driver masks out the DW XPCS MDIO-address from being
auto-detected as PHY by the MDIO subsystem core. Seeing the PCS MDIO ID is
known the procedure of the DW XPCS device creation can be simplified by
dropping the loop over all the MDIO IDs. From now the DW XPCS device
descriptor will be created for the MDIO-bus address pre-defined by the
platform drivers via the stmmac_mdio_bus_data::pcs_mask field.

Note besides this shall speed up a bit the Intel mGBE probing.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 9c54f82901a1..84e13bd5df28 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -82,7 +82,7 @@ struct stmmac_priv;
 
 struct stmmac_mdio_bus_data {
 	unsigned int phy_mask;
-	unsigned int has_xpcs;
+	unsigned int pcs_mask;
 	unsigned int default_an_inband;
 	int *irqs;
 	int probed_phy_irq;
-- 
cgit v1.2.3


From 87b6426ab92efd91a17aed1f5b1d94f36938e28f Mon Sep 17 00:00:00 2001
From: Pascal Paillet <p.paillet@foss.st.com>
Date: Fri, 28 Jun 2024 10:58:11 +0200
Subject: regulator: Add STM32MP25 regulator bindings

These bindings will be used for the SCMI voltage domain.

Signed-off-by: Pascal Paillet <p.paillet@foss.st.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Alexandre Torgue <alexandre.torgue@foss.st.com>
---
 .../dt-bindings/regulator/st,stm32mp25-regulator.h | 48 ++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 include/dt-bindings/regulator/st,stm32mp25-regulator.h

(limited to 'include')

diff --git a/include/dt-bindings/regulator/st,stm32mp25-regulator.h b/include/dt-bindings/regulator/st,stm32mp25-regulator.h
new file mode 100644
index 000000000000..3c3d30911dd0
--- /dev/null
+++ b/include/dt-bindings/regulator/st,stm32mp25-regulator.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (C) 2024, STMicroelectronics - All Rights Reserved
+ */
+
+#ifndef __DT_BINDINGS_REGULATOR_ST_STM32MP25_REGULATOR_H
+#define __DT_BINDINGS_REGULATOR_ST_STM32MP25_REGULATOR_H
+
+/* SCMI voltage domains identifiers */
+
+/* SOC Internal regulators */
+#define VOLTD_SCMI_VDDIO1		0
+#define VOLTD_SCMI_VDDIO2		1
+#define VOLTD_SCMI_VDDIO3		2
+#define VOLTD_SCMI_VDDIO4		3
+#define VOLTD_SCMI_VDDIO		4
+#define VOLTD_SCMI_UCPD			5
+#define VOLTD_SCMI_USB33		6
+#define VOLTD_SCMI_ADC			7
+#define VOLTD_SCMI_GPU			8
+#define VOLTD_SCMI_VREFBUF		9
+
+/* STPMIC2 regulators */
+#define VOLTD_SCMI_STPMIC2_BUCK1	10
+#define VOLTD_SCMI_STPMIC2_BUCK2	11
+#define VOLTD_SCMI_STPMIC2_BUCK3	12
+#define VOLTD_SCMI_STPMIC2_BUCK4	13
+#define VOLTD_SCMI_STPMIC2_BUCK5	14
+#define VOLTD_SCMI_STPMIC2_BUCK6	15
+#define VOLTD_SCMI_STPMIC2_BUCK7	16
+#define VOLTD_SCMI_STPMIC2_LDO1		17
+#define VOLTD_SCMI_STPMIC2_LDO2		18
+#define VOLTD_SCMI_STPMIC2_LDO3		19
+#define VOLTD_SCMI_STPMIC2_LDO4		20
+#define VOLTD_SCMI_STPMIC2_LDO5		21
+#define VOLTD_SCMI_STPMIC2_LDO6		22
+#define VOLTD_SCMI_STPMIC2_LDO7		23
+#define VOLTD_SCMI_STPMIC2_LDO8		24
+#define VOLTD_SCMI_STPMIC2_REFDDR	25
+
+/* External regulators */
+#define VOLTD_SCMI_REGU0		26
+#define VOLTD_SCMI_REGU1		27
+#define VOLTD_SCMI_REGU2		28
+#define VOLTD_SCMI_REGU3		29
+#define VOLTD_SCMI_REGU4		30
+
+#endif /*__DT_BINDINGS_REGULATOR_ST_STM32MP25_REGULATOR_H */
-- 
cgit v1.2.3


From 093b0f366567aa3fed85c316f832607069202b23 Mon Sep 17 00:00:00 2001
From: Adrian Moreno <amorenoz@redhat.com>
Date: Thu, 4 Jul 2024 10:56:52 +0200
Subject: net: psample: add user cookie

Add a user cookie to the sample metadata so that sample emitters can
provide more contextual information to samples.

If present, send the user cookie in a new attribute:
PSAMPLE_ATTR_USER_COOKIE.

Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
Link: https://patch.msgid.link/20240704085710.353845-2-amorenoz@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/psample.h        | 2 ++
 include/uapi/linux/psample.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/psample.h b/include/net/psample.h
index 0509d2d6be67..2ac71260a546 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -25,6 +25,8 @@ struct psample_metadata {
 	   out_tc_occ_valid:1,
 	   latency_valid:1,
 	   unused:5;
+	const u8 *user_cookie;
+	u32 user_cookie_len;
 };
 
 struct psample_group *psample_group_get(struct net *net, u32 group_num);
diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h
index e585db5bf2d2..e80637e1d97b 100644
--- a/include/uapi/linux/psample.h
+++ b/include/uapi/linux/psample.h
@@ -19,6 +19,7 @@ enum {
 	PSAMPLE_ATTR_LATENCY,		/* u64, nanoseconds */
 	PSAMPLE_ATTR_TIMESTAMP,		/* u64, nanoseconds */
 	PSAMPLE_ATTR_PROTO,		/* u16 */
+	PSAMPLE_ATTR_USER_COOKIE,	/* binary, user provided data */
 
 	__PSAMPLE_ATTR_MAX
 };
-- 
cgit v1.2.3


From 7b1b2b60c63f070e0dfbe072ccaae13168b38d01 Mon Sep 17 00:00:00 2001
From: Adrian Moreno <amorenoz@redhat.com>
Date: Thu, 4 Jul 2024 10:56:55 +0200
Subject: net: psample: allow using rate as probability

Although not explicitly documented in the psample module itself, the
definition of PSAMPLE_ATTR_SAMPLE_RATE seems inherited from act_sample.

Quoting tc-sample(8):
"RATE of 100 will lead to an average of one sampled packet out of every
100 observed."

With this semantics, the rates that we can express with an unsigned
32-bits number are very unevenly distributed and concentrated towards
"sampling few packets".
For example, we can express a probability of 2.32E-8% but we
cannot express anything between 100% and 50%.

For sampling applications that are capable of sampling a decent
amount of packets, this sampling rate semantics is not very useful.

Add a new flag to the uAPI that indicates that the sampling rate is
expressed in scaled probability, this is:
- 0 is 0% probability, no packets get sampled.
- U32_MAX is 100% probability, all packets get sampled.

Reviewed-by: Aaron Conole <aconole@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
Link: https://patch.msgid.link/20240704085710.353845-5-amorenoz@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/psample.h        |  3 ++-
 include/uapi/linux/psample.h | 10 +++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/psample.h b/include/net/psample.h
index 2ac71260a546..c52e9ebd88dd 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -24,7 +24,8 @@ struct psample_metadata {
 	u8 out_tc_valid:1,
 	   out_tc_occ_valid:1,
 	   latency_valid:1,
-	   unused:5;
+	   rate_as_probability:1,
+	   unused:4;
 	const u8 *user_cookie;
 	u32 user_cookie_len;
 };
diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h
index e80637e1d97b..b765f0e81f20 100644
--- a/include/uapi/linux/psample.h
+++ b/include/uapi/linux/psample.h
@@ -8,7 +8,11 @@ enum {
 	PSAMPLE_ATTR_ORIGSIZE,
 	PSAMPLE_ATTR_SAMPLE_GROUP,
 	PSAMPLE_ATTR_GROUP_SEQ,
-	PSAMPLE_ATTR_SAMPLE_RATE,
+	PSAMPLE_ATTR_SAMPLE_RATE,	/* u32, ratio between observed and
+					 * sampled packets or scaled probability
+					 * if PSAMPLE_ATTR_SAMPLE_PROBABILITY
+					 * is set.
+					 */
 	PSAMPLE_ATTR_DATA,
 	PSAMPLE_ATTR_GROUP_REFCOUNT,
 	PSAMPLE_ATTR_TUNNEL,
@@ -20,6 +24,10 @@ enum {
 	PSAMPLE_ATTR_TIMESTAMP,		/* u64, nanoseconds */
 	PSAMPLE_ATTR_PROTO,		/* u16 */
 	PSAMPLE_ATTR_USER_COOKIE,	/* binary, user provided data */
+	PSAMPLE_ATTR_SAMPLE_PROBABILITY,/* no argument, interpret rate in
+					 * PSAMPLE_ATTR_SAMPLE_RATE as a
+					 * probability scaled 0 - U32_MAX.
+					 */
 
 	__PSAMPLE_ATTR_MAX
 };
-- 
cgit v1.2.3


From aae0b82b46cb5004bdf82a000c004d69a0885c33 Mon Sep 17 00:00:00 2001
From: Adrian Moreno <amorenoz@redhat.com>
Date: Thu, 4 Jul 2024 10:56:56 +0200
Subject: net: openvswitch: add psample action

Add support for a new action: psample.

This action accepts a u32 group id and a variable-length cookie and uses
the psample multicast group to make the packet available for
observability.

The maximum length of the user-defined cookie is set to 16, same as
tc_cookie, to discourage using cookies that will not be offloadable.

Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
Reviewed-by: Aaron Conole <aconole@redhat.com>
Reviewed-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
Link: https://patch.msgid.link/20240704085710.353845-6-amorenoz@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/openvswitch.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index efc82c318fa2..3dd653748725 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -914,6 +914,31 @@ struct check_pkt_len_arg {
 };
 #endif
 
+#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
+/**
+ * enum ovs_psample_attr - Attributes for %OVS_ACTION_ATTR_PSAMPLE
+ * action.
+ *
+ * @OVS_PSAMPLE_ATTR_GROUP: 32-bit number to identify the source of the
+ * sample.
+ * @OVS_PSAMPLE_ATTR_COOKIE: An optional variable-length binary cookie that
+ * contains user-defined metadata. The maximum length is
+ * OVS_PSAMPLE_COOKIE_MAX_SIZE bytes.
+ *
+ * Sends the packet to the psample multicast group with the specified group and
+ * cookie. It is possible to combine this action with the
+ * %OVS_ACTION_ATTR_TRUNC action to limit the size of the sample.
+ */
+enum ovs_psample_attr {
+	OVS_PSAMPLE_ATTR_GROUP = 1,	/* u32 number. */
+	OVS_PSAMPLE_ATTR_COOKIE,	/* Optional, user specified cookie. */
+
+	/* private: */
+	__OVS_PSAMPLE_ATTR_MAX
+};
+
+#define OVS_PSAMPLE_ATTR_MAX (__OVS_PSAMPLE_ATTR_MAX - 1)
+
 /**
  * enum ovs_action_attr - Action types.
  *
@@ -966,6 +991,8 @@ struct check_pkt_len_arg {
  * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS
  * argument.
  * @OVS_ACTION_ATTR_DROP: Explicit drop action.
+ * @OVS_ACTION_ATTR_PSAMPLE: Send a sample of the packet to external observers
+ * via psample.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -1004,6 +1031,7 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_ADD_MPLS,     /* struct ovs_action_add_mpls. */
 	OVS_ACTION_ATTR_DEC_TTL,      /* Nested OVS_DEC_TTL_ATTR_*. */
 	OVS_ACTION_ATTR_DROP,         /* u32 error code. */
+	OVS_ACTION_ATTR_PSAMPLE,      /* Nested OVS_PSAMPLE_ATTR_*. */
 
 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
 				       * from userspace. */
-- 
cgit v1.2.3


From 71763d8a8203c28178d7be7f18af73d4dddb36ba Mon Sep 17 00:00:00 2001
From: Adrian Moreno <amorenoz@redhat.com>
Date: Thu, 4 Jul 2024 10:56:57 +0200
Subject: net: openvswitch: store sampling probability in cb.

When a packet sample is observed, the sampling rate that was used is
important to estimate the real frequency of such event.

Store the probability of the parent sample action in the skb's cb area
and use it in psample action to pass it down to psample module.

Reviewed-by: Aaron Conole <aconole@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Reviewed-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
Link: https://patch.msgid.link/20240704085710.353845-7-amorenoz@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/openvswitch.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 3dd653748725..3a701bd1f31b 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -649,7 +649,8 @@ enum ovs_flow_attr {
  * Actions are passed as nested attributes.
  *
  * Executes the specified actions with the given probability on a per-packet
- * basis.
+ * basis. Nested actions will be able to access the probability value of the
+ * parent @OVS_ACTION_ATTR_SAMPLE.
  */
 enum ovs_sample_attr {
 	OVS_SAMPLE_ATTR_UNSPEC,
-- 
cgit v1.2.3


From e46296002113b4556baffd5dc68c4f9e22dae13a Mon Sep 17 00:00:00 2001
From: "Kory Maincent (Dent Project)" <kory.maincent@bootlin.com>
Date: Thu, 4 Jul 2024 10:11:56 +0200
Subject: net: ethtool: pse-pd: Expand C33 PSE status with class, power and
 extended state

This update expands the status information provided by ethtool for PSE c33.
It includes details such as the detected class, current power delivered,
and extended state information.

Reviewed-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-1-320003204264@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h              |  15 +++
 include/linux/pse-pd/pse.h           |   8 ++
 include/uapi/linux/ethtool.h         | 191 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/ethtool_netlink.h |   4 +
 4 files changed, 218 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 3a99238ef895..3e70f5d9e0bb 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1273,4 +1273,19 @@ struct ethtool_forced_speed_map {
 
 void
 ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size);
+
+/* C33 PSE extended state and substate. */
+struct ethtool_c33_pse_ext_state_info {
+	enum ethtool_c33_pse_ext_state c33_pse_ext_state;
+	union {
+		enum ethtool_c33_pse_ext_substate_error_condition error_condition;
+		enum ethtool_c33_pse_ext_substate_mr_pse_enable mr_pse_enable;
+		enum ethtool_c33_pse_ext_substate_option_detect_ted option_detect_ted;
+		enum ethtool_c33_pse_ext_substate_option_vport_lim option_vport_lim;
+		enum ethtool_c33_pse_ext_substate_ovld_detected ovld_detected;
+		enum ethtool_c33_pse_ext_substate_power_not_available power_not_available;
+		enum ethtool_c33_pse_ext_substate_short_detected short_detected;
+		u32 __c33_pse_ext_substate;
+	};
+};
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h
index 6eec24ffa866..38b9308e5e7a 100644
--- a/include/linux/pse-pd/pse.h
+++ b/include/linux/pse-pd/pse.h
@@ -36,12 +36,20 @@ struct pse_control_config {
  *	functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState
  * @c33_pw_status: power detection status of the PSE.
  *	IEEE 802.3-2022 30.9.1.1.5 aPSEPowerDetectionStatus:
+ * @c33_pw_class: detected class of a powered PD
+ *	IEEE 802.3-2022 30.9.1.1.8 aPSEPowerClassification
+ * @c33_actual_pw: power currently delivered by the PSE in mW
+ *	IEEE 802.3-2022 30.9.1.1.23 aPSEActualPower
+ * @c33_ext_state_info: extended state information of the PSE
  */
 struct pse_control_status {
 	enum ethtool_podl_pse_admin_state podl_admin_state;
 	enum ethtool_podl_pse_pw_d_status podl_pw_status;
 	enum ethtool_c33_pse_admin_state c33_admin_state;
 	enum ethtool_c33_pse_pw_d_status c33_pw_status;
+	u32 c33_pw_class;
+	u32 c33_actual_pw;
+	struct ethtool_c33_pse_ext_state_info c33_ext_state_info;
 };
 
 /**
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index e011384c915c..230110b97029 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -752,6 +752,197 @@ enum ethtool_module_power_mode {
 	ETHTOOL_MODULE_POWER_MODE_HIGH,
 };
 
+/**
+ * enum ethtool_c33_pse_ext_state - groups of PSE extended states
+ *      functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION: Group of error_condition states
+ * @ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID: Group of mr_mps_valid states
+ * @ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE: Group of mr_pse_enable states
+ * @ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED: Group of option_detect_ted
+ *	states
+ * @ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM: Group of option_vport_lim states
+ * @ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED: Group of ovld_detected states
+ * @ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE: Group of pd_dll_power_type
+ *	states
+ * @ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE: Group of power_not_available
+ *	states
+ * @ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED: Group of short_detected states
+ */
+enum ethtool_c33_pse_ext_state {
+	ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION = 1,
+	ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID,
+	ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE,
+	ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED,
+	ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM,
+	ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED,
+	ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE,
+	ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE,
+	ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_mr_mps_valid - mr_mps_valid states
+ *      functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD: Underload
+ *	state
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN: Port is not
+ *	connected
+ *
+ * The PSE monitors either the DC or AC Maintain Power Signature
+ * (MPS, see 33.2.9.1). This variable indicates the presence or absence of
+ * a valid MPS.
+ */
+enum ethtool_c33_pse_ext_substate_mr_mps_valid {
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD = 1,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_error_condition - error_condition states
+ *      functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT: Non-existing
+ *	port number
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT: Undefined port
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT: Internal
+ *	hardware fault
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON:
+ *	Communication error after force on
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS: Unknown
+ *	port status
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF: Host
+ *	crash turn off
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN:
+ *	Host crash force shutdown
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE: Configuration
+ *	change
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP: Over
+ *	temperature detected
+ *
+ * error_condition is a variable indicating the status of
+ * implementation-specific fault conditions or optionally other system faults
+ * that prevent the PSE from meeting the specifications in Table 33–11 and that
+ * require the PSE not to source power. These error conditions are different
+ * from those monitored by the state diagrams in Figure 33–10.
+ */
+enum ethtool_c33_pse_ext_substate_error_condition {
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT = 1,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_mr_pse_enable - mr_pse_enable states
+ *      functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE: Disable
+ *	pin active
+ *
+ * mr_pse_enable is control variable that selects PSE operation and test
+ * functions.
+ */
+enum ethtool_c33_pse_ext_substate_mr_pse_enable {
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE = 1,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_option_detect_ted - option_detect_ted
+ *	states functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS: Detection
+ *	in process
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR:
+ *	Connection check error
+ *
+ * option_detect_ted is a variable indicating if detection can be performed
+ * by the PSE during the ted_timer interval.
+ */
+enum ethtool_c33_pse_ext_substate_option_detect_ted {
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS = 1,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_option_vport_lim - option_vport_lim states
+ *      functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE: Main supply
+ *	voltage is high
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE: Main supply
+ *	voltage is low
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION: Voltage
+ *	injection into the port
+ *
+ * option_vport_lim is an optional variable indicates if VPSE is out of the
+ * operating range during normal operating state.
+ */
+enum ethtool_c33_pse_ext_substate_option_vport_lim {
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE = 1,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_ovld_detected - ovld_detected states
+ *      functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD: Overload state
+ *
+ * ovld_detected is a variable indicating if the PSE output current has been
+ * in an overload condition (see 33.2.7.6) for at least TCUT of a one-second
+ * sliding time.
+ */
+enum ethtool_c33_pse_ext_substate_ovld_detected {
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD = 1,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_power_not_available - power_not_available
+ *	states functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED: Power
+ *	budget exceeded for the controller
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET:
+ *	Configured port power limit exceeded controller power budget
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT:
+ *	Power request from PD exceeds port limit
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT: Power
+ *	denied due to Hardware power limit
+ *
+ * power_not_available is a variable that is asserted in an
+ * implementation-dependent manner when the PSE is no longer capable of
+ * sourcing sufficient power to support the attached PD. Sufficient power
+ * is defined by classification; see 33.2.6.
+ */
+enum ethtool_c33_pse_ext_substate_power_not_available {
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED =  1,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT,
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT,
+};
+
+/**
+ * enum ethtool_c33_pse_ext_substate_short_detected - short_detected states
+ *      functions. IEEE 802.3-2022 33.2.4.4 Variables
+ *
+ * @ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION: Short
+ *	condition was detected
+ *
+ * short_detected is a variable indicating if the PSE output current has been
+ * in a short circuit condition for TLIM within a sliding window (see 33.2.7.7).
+ */
+enum ethtool_c33_pse_ext_substate_short_detected {
+	ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION = 1,
+};
+
 /**
  * enum ethtool_pse_types - Types of PSE controller.
  * @ETHTOOL_PSE_UNKNOWN: Type of PSE controller is unknown
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 840dabdc9d88..b8895da001bc 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -939,6 +939,10 @@ enum {
 	ETHTOOL_A_C33_PSE_ADMIN_STATE,		/* u32 */
 	ETHTOOL_A_C33_PSE_ADMIN_CONTROL,	/* u32 */
 	ETHTOOL_A_C33_PSE_PW_D_STATUS,		/* u32 */
+	ETHTOOL_A_C33_PSE_PW_CLASS,		/* u32 */
+	ETHTOOL_A_C33_PSE_ACTUAL_PW,		/* u32 */
+	ETHTOOL_A_C33_PSE_EXT_STATE,		/* u32 */
+	ETHTOOL_A_C33_PSE_EXT_SUBSTATE,		/* u32 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_PSE_CNT,
-- 
cgit v1.2.3


From 4a83abcef5f4f36be4e04ba18edd64226f6f4a19 Mon Sep 17 00:00:00 2001
From: "Kory Maincent (Dent Project)" <kory.maincent@bootlin.com>
Date: Thu, 4 Jul 2024 10:11:59 +0200
Subject: net: pse-pd: Add new power limit get and set c33 features

This patch add a way to get and set the power limit of a PSE PI.
For that it uses regulator API callbacks wrapper like get_voltage() and
get/set_current_limit() as power is simply V * I.
We used mW unit as defined by the IEEE 802.3-2022 standards.

set_current_limit() uses the voltage return by get_voltage() and the
desired power limit to calculate the current limit. get_voltage() callback
is then mandatory to set the power limit.

get_current_limit() callback is by default looking at a driver callback
and fallback to extracting the current limit from _pse_ethtool_get_status()
if the driver does not set its callback. We prefer let the user the choice
because ethtool_get_status return much more information than the current
limit.

expand pse status with c33_pw_limit_ranges to return the ranges available
to configure the power limit.

Reviewed-by: Sai Krishna <saikrishnag@marvell.com>
Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-4-320003204264@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/pse-pd/pse.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h
index 38b9308e5e7a..591a53e082e6 100644
--- a/include/linux/pse-pd/pse.h
+++ b/include/linux/pse-pd/pse.h
@@ -9,6 +9,9 @@
 #include <linux/list.h>
 #include <uapi/linux/ethtool.h>
 
+/* Maximum current in uA according to IEEE 802.3-2022 Table 145-1 */
+#define MAX_PI_CURRENT 1920000
+
 struct phy_device;
 struct pse_controller_dev;
 
@@ -41,6 +44,12 @@ struct pse_control_config {
  * @c33_actual_pw: power currently delivered by the PSE in mW
  *	IEEE 802.3-2022 30.9.1.1.23 aPSEActualPower
  * @c33_ext_state_info: extended state information of the PSE
+ * @c33_avail_pw_limit: available power limit of the PSE in mW
+ *	IEEE 802.3-2022 145.2.5.4 pse_avail_pwr
+ * @c33_pw_limit_ranges: supported power limit configuration range. The driver
+ *	is in charge of the memory allocation.
+ * @c33_pw_limit_nb_ranges: number of supported power limit configuration
+ *	ranges
  */
 struct pse_control_status {
 	enum ethtool_podl_pse_admin_state podl_admin_state;
@@ -50,6 +59,9 @@ struct pse_control_status {
 	u32 c33_pw_class;
 	u32 c33_actual_pw;
 	struct ethtool_c33_pse_ext_state_info c33_ext_state_info;
+	u32 c33_avail_pw_limit;
+	struct ethtool_c33_pse_pw_limit_range *c33_pw_limit_ranges;
+	u32 c33_pw_limit_nb_ranges;
 };
 
 /**
@@ -61,6 +73,14 @@ struct pse_control_status {
  *		   May also return negative errno.
  * @pi_enable: Configure the PSE PI as enabled.
  * @pi_disable: Configure the PSE PI as disabled.
+ * @pi_get_voltage: Return voltage similarly to get_voltage regulator
+ *		    callback.
+ * @pi_get_current_limit: Get the configured current limit similarly to
+ *			  get_current_limit regulator callback.
+ * @pi_set_current_limit: Configure the current limit similarly to
+ *			  set_current_limit regulator callback.
+ *			  Should not return an error in case of MAX_PI_CURRENT
+ *			  current value set.
  */
 struct pse_controller_ops {
 	int (*ethtool_get_status)(struct pse_controller_dev *pcdev,
@@ -70,6 +90,11 @@ struct pse_controller_ops {
 	int (*pi_is_enabled)(struct pse_controller_dev *pcdev, int id);
 	int (*pi_enable)(struct pse_controller_dev *pcdev, int id);
 	int (*pi_disable)(struct pse_controller_dev *pcdev, int id);
+	int (*pi_get_voltage)(struct pse_controller_dev *pcdev, int id);
+	int (*pi_get_current_limit)(struct pse_controller_dev *pcdev,
+				    int id);
+	int (*pi_set_current_limit)(struct pse_controller_dev *pcdev,
+				    int id, int max_uA);
 };
 
 struct module;
@@ -156,6 +181,11 @@ int pse_ethtool_get_status(struct pse_control *psec,
 int pse_ethtool_set_config(struct pse_control *psec,
 			   struct netlink_ext_ack *extack,
 			   const struct pse_control_config *config);
+int pse_ethtool_set_pw_limit(struct pse_control *psec,
+			     struct netlink_ext_ack *extack,
+			     const unsigned int pw_limit);
+int pse_ethtool_get_pw_limit(struct pse_control *psec,
+			     struct netlink_ext_ack *extack);
 
 bool pse_has_podl(struct pse_control *psec);
 bool pse_has_c33(struct pse_control *psec);
@@ -185,6 +215,19 @@ static inline int pse_ethtool_set_config(struct pse_control *psec,
 	return -EOPNOTSUPP;
 }
 
+static inline int pse_ethtool_set_pw_limit(struct pse_control *psec,
+					   struct netlink_ext_ack *extack,
+					   const unsigned int pw_limit)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int pse_ethtool_get_pw_limit(struct pse_control *psec,
+					   struct netlink_ext_ack *extack)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline bool pse_has_podl(struct pse_control *psec)
 {
 	return false;
-- 
cgit v1.2.3


From 30d7b6727724ce3729f2cb5b8be985d2d1931d2b Mon Sep 17 00:00:00 2001
From: "Kory Maincent (Dent Project)" <kory.maincent@bootlin.com>
Date: Thu, 4 Jul 2024 10:12:00 +0200
Subject: net: ethtool: Add new power limit get and set features

This patch expands the status information provided by ethtool for PSE c33
with available power limit and available power limit ranges. It also adds
a call to pse_ethtool_set_pw_limit() to configure the PSE control power
limit.

Reviewed-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-5-320003204264@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h              | 5 +++++
 include/uapi/linux/ethtool_netlink.h | 8 ++++++++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 3e70f5d9e0bb..e213b5508da6 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1288,4 +1288,9 @@ struct ethtool_c33_pse_ext_state_info {
 		u32 __c33_pse_ext_substate;
 	};
 };
+
+struct ethtool_c33_pse_pw_limit_range {
+	u32 min;
+	u32 max;
+};
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index b8895da001bc..6d5bdcc67631 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -930,6 +930,12 @@ enum {
 };
 
 /* Power Sourcing Equipment */
+enum {
+	ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC,
+	ETHTOOL_A_C33_PSE_PW_LIMIT_MIN,	/* u32 */
+	ETHTOOL_A_C33_PSE_PW_LIMIT_MAX,	/* u32 */
+};
+
 enum {
 	ETHTOOL_A_PSE_UNSPEC,
 	ETHTOOL_A_PSE_HEADER,			/* nest - _A_HEADER_* */
@@ -943,6 +949,8 @@ enum {
 	ETHTOOL_A_C33_PSE_ACTUAL_PW,		/* u32 */
 	ETHTOOL_A_C33_PSE_EXT_STATE,		/* u32 */
 	ETHTOOL_A_C33_PSE_EXT_SUBSTATE,		/* u32 */
+	ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT,	/* u32 */
+	ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES,	/* nest - _C33_PSE_PW_LIMIT_* */
 
 	/* add new constants above here */
 	__ETHTOOL_A_PSE_CNT,
-- 
cgit v1.2.3


From fefbbdfb59d3a20fd98734363e6dd9fa7cc65c70 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 5 Jul 2024 18:03:42 +0200
Subject: ALSA: seq: Add tempo base unit for MIDI2 Set Tempo messages

MIDI2 Set Tempo message defines the tempo in 10ns unit for finer
accuracy, while MIDI1 was defined in 1us unit.  For adapting this
different unit, introduce "tempo_base" field to snd_seq_queue_tempo
struct so that user-space can pass the proper tempo base unit.

The accepted value is limited, it must be either 0, 10 or 1000.

The protocol version is bumped to 1.0.4 along with this.

The access with the older protocol version ignores the tempo-base
value in ioctls and always treats as 1000.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Link: https://patch.msgid.link/20240705160344.6481-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/uapi/sound/asequencer.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/sound/asequencer.h b/include/uapi/sound/asequencer.h
index c85fdd8895d8..39b37edcf813 100644
--- a/include/uapi/sound/asequencer.h
+++ b/include/uapi/sound/asequencer.h
@@ -10,7 +10,7 @@
 #include <sound/asound.h>
 
 /** version of the sequencer */
-#define SNDRV_SEQ_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 3)
+#define SNDRV_SEQ_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 4)
 
 /**
  * definition of sequencer event types
@@ -523,11 +523,12 @@ struct snd_seq_queue_status {
 /* queue tempo */
 struct snd_seq_queue_tempo {
 	int queue;			/* sequencer queue */
-	unsigned int tempo;		/* current tempo, us/tick */
+	unsigned int tempo;		/* current tempo, us/tick (or different time-base below) */
 	int ppq;			/* time resolution, ticks/quarter */
 	unsigned int skew_value;	/* queue skew */
 	unsigned int skew_base;		/* queue skew base */
-	char reserved[24];		/* for the future */
+	unsigned short tempo_base;	/* tempo base in nsec unit; either 10 or 1000 */
+	char reserved[22];		/* for the future */
 };
 
 
-- 
cgit v1.2.3


From 4bf1ea3fc914faef37d8c793b7144d4765ff4a00 Mon Sep 17 00:00:00 2001
From: Devin Bayer <dev@doubly.so>
Date: Fri, 28 Jun 2024 10:46:03 +0200
Subject: platform/x86: asus-wmi: support the disable camera LED on F10 of
 Zenbook 2023
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a sysfs entry for the LED on F10 above the crossed out camera icon
on 2023 Zenbooks.

Signed-off-by: Devin Bayer <dev@doubly.so>
Link: https://lore.kernel.org/r/20240628084603.217106-1-dev@doubly.so
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 include/linux/platform_data/x86/asus-wmi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 3eb5cd6773ad..0aeeae1c1943 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -51,6 +51,10 @@
 #define ASUS_WMI_DEVID_LED6		0x00020016
 #define ASUS_WMI_DEVID_MICMUTE_LED		0x00040017
 
+/* Disable Camera LED */
+#define ASUS_WMI_DEVID_CAMERA_LED_NEG	0x00060078 /* 0 = on (unused) */
+#define ASUS_WMI_DEVID_CAMERA_LED	0x00060079 /* 1 = on */
+
 /* Backlight and Brightness */
 #define ASUS_WMI_DEVID_ALS_ENABLE	0x00050001 /* Ambient Light Sensor */
 #define ASUS_WMI_DEVID_BACKLIGHT	0x00050011
-- 
cgit v1.2.3


From d1f1570f3d6db5d35642092a671812e62bfba79d Mon Sep 17 00:00:00 2001
From: Varadarajan Narayanan <quic_varada@quicinc.com>
Date: Tue, 30 Apr 2024 12:12:10 +0530
Subject: dt-bindings: interconnect: Add Qualcomm IPQ9574 support

Add interconnect-cells to clock provider so that it can be
used as icc provider.

Add master/slave ids for Qualcomm IPQ9574 Network-On-Chip
interfaces. This will be used by the gcc-ipq9574 driver
that will for providing interconnect services using the
icc-clk framework.

Acked-by: Georgi Djakov <djakov@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Varadarajan Narayanan <quic_varada@quicinc.com>
Link: https://lore.kernel.org/r/20240430064214.2030013-3-quic_varada@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/dt-bindings/interconnect/qcom,ipq9574.h | 59 +++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 include/dt-bindings/interconnect/qcom,ipq9574.h

(limited to 'include')

diff --git a/include/dt-bindings/interconnect/qcom,ipq9574.h b/include/dt-bindings/interconnect/qcom,ipq9574.h
new file mode 100644
index 000000000000..42019335c7dd
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,ipq9574.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+#ifndef INTERCONNECT_QCOM_IPQ9574_H
+#define INTERCONNECT_QCOM_IPQ9574_H
+
+#define MASTER_ANOC_PCIE0		0
+#define SLAVE_ANOC_PCIE0		1
+#define MASTER_SNOC_PCIE0		2
+#define SLAVE_SNOC_PCIE0		3
+#define MASTER_ANOC_PCIE1		4
+#define SLAVE_ANOC_PCIE1		5
+#define MASTER_SNOC_PCIE1		6
+#define SLAVE_SNOC_PCIE1		7
+#define MASTER_ANOC_PCIE2		8
+#define SLAVE_ANOC_PCIE2		9
+#define MASTER_SNOC_PCIE2		10
+#define SLAVE_SNOC_PCIE2		11
+#define MASTER_ANOC_PCIE3		12
+#define SLAVE_ANOC_PCIE3		13
+#define MASTER_SNOC_PCIE3		14
+#define SLAVE_SNOC_PCIE3		15
+#define MASTER_USB			16
+#define SLAVE_USB			17
+#define MASTER_USB_AXI			18
+#define SLAVE_USB_AXI			19
+#define MASTER_NSSNOC_NSSCC		20
+#define SLAVE_NSSNOC_NSSCC		21
+#define MASTER_NSSNOC_SNOC_0		22
+#define SLAVE_NSSNOC_SNOC_0		23
+#define MASTER_NSSNOC_SNOC_1		24
+#define SLAVE_NSSNOC_SNOC_1		25
+#define MASTER_NSSNOC_PCNOC_1		26
+#define SLAVE_NSSNOC_PCNOC_1		27
+#define MASTER_NSSNOC_QOSGEN_REF	28
+#define SLAVE_NSSNOC_QOSGEN_REF		29
+#define MASTER_NSSNOC_TIMEOUT_REF	30
+#define SLAVE_NSSNOC_TIMEOUT_REF	31
+#define MASTER_NSSNOC_XO_DCD		32
+#define SLAVE_NSSNOC_XO_DCD		33
+#define MASTER_NSSNOC_ATB		34
+#define SLAVE_NSSNOC_ATB		35
+#define MASTER_MEM_NOC_NSSNOC		36
+#define SLAVE_MEM_NOC_NSSNOC		37
+#define MASTER_NSSNOC_MEMNOC		38
+#define SLAVE_NSSNOC_MEMNOC		39
+#define MASTER_NSSNOC_MEM_NOC_1		40
+#define SLAVE_NSSNOC_MEM_NOC_1		41
+
+#define MASTER_NSSNOC_PPE		0
+#define SLAVE_NSSNOC_PPE		1
+#define MASTER_NSSNOC_PPE_CFG		2
+#define SLAVE_NSSNOC_PPE_CFG		3
+#define MASTER_NSSNOC_NSS_CSR		4
+#define SLAVE_NSSNOC_NSS_CSR		5
+#define MASTER_NSSNOC_IMEM_QSB		6
+#define SLAVE_NSSNOC_IMEM_QSB		7
+#define MASTER_NSSNOC_IMEM_AHB		8
+#define SLAVE_NSSNOC_IMEM_AHB		9
+
+#endif /* INTERCONNECT_QCOM_IPQ9574_H */
-- 
cgit v1.2.3


From 28bdacbcb36d093e23734acccecd139f5fc05f67 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 26 Jun 2024 16:53:23 +0800
Subject: mm: move memory_failure_queue() into copy_mc_[user]_highpage()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm: migrate: support poison recover from migrate folio", v5.

The folio migration is widely used in kernel, memory compaction, memory
hotplug, soft offline page, numa balance, memory demote/promotion, etc,
but once access a poisoned source folio when migrating, the kernel will
panic.

There is a mechanism in the kernel to recover from uncorrectable memory
errors, ARCH_HAS_COPY_MC(eg, Machine Check Safe Memory Copy on x86), which
is already used in NVDIMM or core-mm paths(eg, CoW, khugepaged, coredump,
ksm copy), see copy_mc_to_{user,kernel}, copy_mc_{user_}highpage callers.

This series of patches provide the recovery mechanism from folio copy for
the widely used folio migration.  Please note, because folio migration is
no guarantee of success, so we could chose to make folio migration
tolerant of memory failures, adding folio_mc_copy() which is a #MC
versions of folio_copy(), once accessing a poisoned source folio, we could
return error and make the folio migration fail, and this could avoid the
similar panic shown below.

  CPU: 1 PID: 88343 Comm: test_softofflin Kdump: loaded Not tainted 6.6.0
  pc : copy_page+0x10/0xc0
  lr : copy_highpage+0x38/0x50
  ...
  Call trace:
   copy_page+0x10/0xc0
   folio_copy+0x78/0x90
   migrate_folio_extra+0x54/0xa0
   move_to_new_folio+0xd8/0x1f0
   migrate_folio_move+0xb8/0x300
   migrate_pages_batch+0x528/0x788
   migrate_pages_sync+0x8c/0x258
   migrate_pages+0x440/0x528
   soft_offline_in_use_page+0x2ec/0x3c0
   soft_offline_page+0x238/0x310
   soft_offline_page_store+0x6c/0xc0
   dev_attr_store+0x20/0x40
   sysfs_kf_write+0x4c/0x68
   kernfs_fop_write_iter+0x130/0x1c8
   new_sync_write+0xa4/0x138
   vfs_write+0x238/0x2d8
   ksys_write+0x74/0x110


This patch (of 5):

There is a memory_failure_queue() call after copy_mc_[user]_highpage(),
see callers, eg, CoW/KSM page copy, it is used to mark the source page as
h/w poisoned and unmap it from other tasks, and the upcomming poison
recover from migrate folio will do the similar thing, so let's move the
memory_failure_queue() into the copy_mc_[user]_highpage() instead of
adding it into each user, this should also enhance the handling of
poisoned page in khugepaged.

Link: https://lkml.kernel.org/r/20240626085328.608006-1-wangkefeng.wang@huawei.com
Link: https://lkml.kernel.org/r/20240626085328.608006-2-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Jane Chu <jane.chu@oracle.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jiaqi Yan <jiaqiyan@google.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/highmem.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index fa6891e06316..930a591b9b61 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -352,6 +352,9 @@ static inline int copy_mc_user_highpage(struct page *to, struct page *from,
 	kunmap_local(vto);
 	kunmap_local(vfrom);
 
+	if (ret)
+		memory_failure_queue(page_to_pfn(from), 0);
+
 	return ret;
 }
 
@@ -368,6 +371,9 @@ static inline int copy_mc_highpage(struct page *to, struct page *from)
 	kunmap_local(vto);
 	kunmap_local(vfrom);
 
+	if (ret)
+		memory_failure_queue(page_to_pfn(from), 0);
+
 	return ret;
 }
 #else
-- 
cgit v1.2.3


From 02f4ee5a144cef6b26421cb42cca64bb4138d459 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 26 Jun 2024 16:53:24 +0800
Subject: mm: add folio_mc_copy()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a #MC variant of folio_copy() which uses copy_mc_highpage() to support
#MC handled during folio copy, it will be used in folio migration soon.

Link: https://lkml.kernel.org/r/20240626085328.608006-3-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Jane Chu <jane.chu@oracle.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jiaqi Yan <jiaqiyan@google.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e2140ea6ae98..a2e3ebead410 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1291,6 +1291,7 @@ void put_pages_list(struct list_head *pages);
 
 void split_page(struct page *page, unsigned int order);
 void folio_copy(struct folio *dst, struct folio *src);
+int folio_mc_copy(struct folio *dst, struct folio *src);
 
 unsigned long nr_free_buffer_pages(void);
 
-- 
cgit v1.2.3


From 3f59493713534b61abcbe2e57582cf7e31a42d51 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 26 Jun 2024 16:53:28 +0800
Subject: mm: migrate: remove folio_migrate_copy()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The folio_migrate_copy() is just a wrapper of folio_copy() and
folio_migrate_flags(), it is simple and only aio use it for now, unfold it
and remove folio_migrate_copy().

Link: https://lkml.kernel.org/r/20240626085328.608006-7-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Jane Chu <jane.chu@oracle.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jiaqi Yan <jiaqiyan@google.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/migrate.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index af2579ae93f2..644be30b69c8 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -76,7 +76,6 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl)
 		__releases(ptl);
 void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
-void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
 int folio_migrate_mapping(struct address_space *mapping,
 		struct folio *newfolio, struct folio *folio, int extra_count);
 
-- 
cgit v1.2.3


From 25f76c3db2f08428b5acd082a52787164001eb6e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 6 Jul 2024 09:52:17 +0200
Subject: block: add a bvec_phys helper

Get callers out of poking into bvec internals a bit more.  Not a huge win
right now, but with the proposed new DMA mapping API we might end up with
a lot more of this otherwise.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20240706075228.2350978-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bvec.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index bd1e361b351c..f41c7f0ef91e 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -280,4 +280,18 @@ static inline void *bvec_virt(struct bio_vec *bvec)
 	return page_address(bvec->bv_page) + bvec->bv_offset;
 }
 
+/**
+ * bvec_phys - return the physical address for a bvec
+ * @bvec: bvec to return the physical address for
+ */
+static inline phys_addr_t bvec_phys(const struct bio_vec *bvec)
+{
+	/*
+	 * Note this open codes page_to_phys because page_to_phys is defined in
+	 * <asm/io.h>, which we don't want to pull in here.  If it ever moves to
+	 * a sensible place we should start using it.
+	 */
+	return PFN_PHYS(page_to_pfn(bvec->bv_page)) + bvec->bv_offset;
+}
+
 #endif /* __LINUX_BVEC_H */
-- 
cgit v1.2.3


From cd6193877c603f4b0c3c7e5607ffa3d52815403f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 1 Jul 2024 09:35:33 +0200
Subject: x86/efistub: Enable SMBIOS protocol handling for x86

The smbios.c source file is not currently included in the x86 build, and
before we can do so, it needs some tweaks to build correctly in
combination with the EFI mixed mode support.

Reviewed-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 418e555459da..2a539816a436 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -74,10 +74,10 @@ typedef void *efi_handle_t;
  */
 typedef guid_t efi_guid_t __aligned(__alignof__(u32));
 
-#define EFI_GUID(a, b, c, d...) (efi_guid_t){ {					\
+#define EFI_GUID(a, b, c, d...) ((efi_guid_t){ {				\
 	(a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff,	\
 	(b) & 0xff, ((b) >> 8) & 0xff,						\
-	(c) & 0xff, ((c) >> 8) & 0xff, d } }
+	(c) & 0xff, ((c) >> 8) & 0xff, d } })
 
 /*
  * Generic EFI table header
-- 
cgit v1.2.3


From 71e49eccdca6328eecc335ed8f5557bd0ed70fc6 Mon Sep 17 00:00:00 2001
From: Aditya Garg <gargaditya08@live.com>
Date: Sun, 30 Jun 2024 19:24:54 +0000
Subject: x86/efistub: Call Apple set_os protocol on dual GPU Intel Macs

0c18184de990 ("platform/x86: apple-gmux: support MMIO gmux on T2 Macs")
brought support for T2 Macs in apple-gmux. But in order to use dual GPU,
the integrated GPU has to be enabled. On such dual GPU EFI Macs, the EFI
stub needs to report that it is booting macOS in order to prevent the
firmware from disabling the iGPU.

This patch is also applicable for some non T2 Intel Macs.

Based on this patch for GRUB by Andreas Heider <andreas@heider.io>:
https://lists.gnu.org/archive/html/grub-devel/2013-12/msg00442.html

Credits also goto Kerem Karabay <kekrby@gmail.com> for helping porting
the patch to the Linux kernel.

Cc: Orlando Chamberlain <orlandoch.dev@gmail.com>
Signed-off-by: Aditya Garg <gargaditya08@live.com>
[ardb: limit scope using list of DMI matches provided by Lukas and Orlando]
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Tested-by: Aditya Garg <gargaditya08@live.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 2a539816a436..3a6c04a9f9aa 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -385,6 +385,7 @@ void efi_native_runtime_setup(void);
 #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID	EFI_GUID(0xdcfa911d, 0x26eb, 0x469f,  0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20)
 #define EFI_CONSOLE_OUT_DEVICE_GUID		EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4,  0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
 #define APPLE_PROPERTIES_PROTOCOL_GUID		EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb,  0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0)
+#define APPLE_SET_OS_PROTOCOL_GUID		EFI_GUID(0xc5c5da95, 0x7d5c, 0x45e6,  0xb2, 0xf1, 0x3f, 0xd5, 0x2b, 0xb1, 0x00, 0x77)
 #define EFI_TCG2_PROTOCOL_GUID			EFI_GUID(0x607f766c, 0x7455, 0x42be,  0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f)
 #define EFI_TCG2_FINAL_EVENTS_TABLE_GUID	EFI_GUID(0x1e2ed096, 0x30e2, 0x4254,  0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25)
 #define EFI_LOAD_FILE_PROTOCOL_GUID		EFI_GUID(0x56ec3091, 0x954c, 0x11d2,  0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b)
-- 
cgit v1.2.3


From f21711bbdbf0d95a389bfaad54ce444b46830d58 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 6 Jul 2024 13:13:42 +0200
Subject: regmap-irq: handle const struct regmap_irq_sub_irq_map
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The struct instances supplied by the drivers are never modified.
Handle them as const in the regmap core allowing the drivers to put them
into .rodata.

Also add a new entry to const_structs.checkpatch to make sure future
instances of this struct already enter the tree as const.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://patch.msgid.link/20240706-regmap-const-structs-v1-2-d08c776da787@weissschuh.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index a6bc2980a98b..2da1cfc52233 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1607,7 +1607,7 @@ struct regmap_irq_chip {
 
 	unsigned int main_status;
 	unsigned int num_main_status_bits;
-	struct regmap_irq_sub_irq_map *sub_reg_offsets;
+	const struct regmap_irq_sub_irq_map *sub_reg_offsets;
 	int num_main_regs;
 
 	unsigned int status_base;
-- 
cgit v1.2.3


From f45b94ffc5f1204b35b5c695ed265b1385951616 Mon Sep 17 00:00:00 2001
From: Varadarajan Narayanan <quic_varada@quicinc.com>
Date: Tue, 30 Apr 2024 12:12:09 +0530
Subject: interconnect: icc-clk: Specify master/slave ids

Presently, icc-clk driver autogenerates the master and slave ids.
However, devices with multiple nodes on the interconnect could
have other constraints and may not match with the auto generated
node ids.

Hence, modify the driver to use the master/slave ids provided by
the caller instead of auto generating.

Also, update clk-cbf-8996 accordingly.

Acked-by: Georgi Djakov <djakov@kernel.org>
Signed-off-by: Varadarajan Narayanan <quic_varada@quicinc.com>
Link: https://lore.kernel.org/r/20240430064214.2030013-2-quic_varada@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/interconnect-clk.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/interconnect-clk.h b/include/linux/interconnect-clk.h
index 0cd80112bea5..170898faaacb 100644
--- a/include/linux/interconnect-clk.h
+++ b/include/linux/interconnect-clk.h
@@ -11,6 +11,8 @@ struct device;
 struct icc_clk_data {
 	struct clk *clk;
 	const char *name;
+	unsigned int master_id;
+	unsigned int slave_id;
 };
 
 struct icc_provider *icc_clk_register(struct device *dev,
-- 
cgit v1.2.3


From d3153113619216e87038a20bebf82582f9be10e7 Mon Sep 17 00:00:00 2001
From: Varadarajan Narayanan <quic_varada@quicinc.com>
Date: Tue, 30 Apr 2024 12:12:11 +0530
Subject: interconnect: icc-clk: Add devm_icc_clk_register

Wrap icc_clk_register to create devm_icc_clk_register to be
able to release the resources properly.

Acked-by: Georgi Djakov <djakov@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Varadarajan Narayanan <quic_varada@quicinc.com>
Link: https://lore.kernel.org/r/20240430064214.2030013-4-quic_varada@quicinc.com
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
---
 include/linux/interconnect-clk.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/interconnect-clk.h b/include/linux/interconnect-clk.h
index 170898faaacb..9bcee3e9c56c 100644
--- a/include/linux/interconnect-clk.h
+++ b/include/linux/interconnect-clk.h
@@ -19,6 +19,8 @@ struct icc_provider *icc_clk_register(struct device *dev,
 				      unsigned int first_id,
 				      unsigned int num_clocks,
 				      const struct icc_clk_data *data);
+int devm_icc_clk_register(struct device *dev, unsigned int first_id,
+			  unsigned int num_clocks, const struct icc_clk_data *data);
 void icc_clk_unregister(struct icc_provider *provider);
 
 #endif
-- 
cgit v1.2.3


From 7e86845a0346efc95fddaa97ce5cd6a8bda8c71c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 4 Jun 2024 15:45:24 -0400
Subject: rpcrdma: Implement generic device removal

Commit e87a911fed07 ("nvme-rdma: use ib_client API to detect device
removal") explains the benefits of handling device removal outside
of the CM event handler.

Sketch in an IB device removal notification mechanism that can be
used by both the client and server side RPC-over-RDMA transport
implementations.

Suggested-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/rdma_rn.h | 27 +++++++++++++++++++++++++++
 include/trace/events/rpcrdma.h | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)
 create mode 100644 include/linux/sunrpc/rdma_rn.h

(limited to 'include')

diff --git a/include/linux/sunrpc/rdma_rn.h b/include/linux/sunrpc/rdma_rn.h
new file mode 100644
index 000000000000..7d032ca057af
--- /dev/null
+++ b/include/linux/sunrpc/rdma_rn.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * * Copyright (c) 2024, Oracle and/or its affiliates.
+ */
+
+#ifndef _LINUX_SUNRPC_RDMA_RN_H
+#define _LINUX_SUNRPC_RDMA_RN_H
+
+#include <rdma/ib_verbs.h>
+
+/**
+ * rpcrdma_notification - request removal notification
+ */
+struct rpcrdma_notification {
+	void			(*rn_done)(struct rpcrdma_notification *rn);
+	u32			rn_index;
+};
+
+int rpcrdma_rn_register(struct ib_device *device,
+			struct rpcrdma_notification *rn,
+			void (*done)(struct rpcrdma_notification *rn));
+void rpcrdma_rn_unregister(struct ib_device *device,
+			   struct rpcrdma_notification *rn);
+int rpcrdma_ib_client_register(void);
+void rpcrdma_ib_client_unregister(void);
+
+#endif /* _LINUX_SUNRPC_RDMA_RN_H */
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 14392652273a..ecdaf088219d 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -2220,6 +2220,40 @@ TRACE_EVENT(svcrdma_sq_post_err,
 	)
 );
 
+DECLARE_EVENT_CLASS(rpcrdma_client_device_class,
+	TP_PROTO(
+		const struct ib_device *device
+	),
+
+	TP_ARGS(device),
+
+	TP_STRUCT__entry(
+		__string(name, device->name)
+	),
+
+	TP_fast_assign(
+		__assign_str(name);
+	),
+
+	TP_printk("device=%s",
+		__get_str(name)
+	)
+);
+
+#define DEFINE_CLIENT_DEVICE_EVENT(name)				\
+	DEFINE_EVENT(rpcrdma_client_device_class, name,			\
+		TP_PROTO(						\
+			const struct ib_device *device			\
+		),							\
+		TP_ARGS(device)						\
+	)
+
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_completion);
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_add_one);
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one);
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on);
+DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done);
+
 #endif /* _TRACE_RPCRDMA_H */
 
 #include <trace/define_trace.h>
-- 
cgit v1.2.3


From 3f4eb9ff923413cdb4c7e171c06d3564f6286712 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 4 Jun 2024 15:45:25 -0400
Subject: xprtrdma: Handle device removal outside of the CM event handler

Wait for all disconnects to complete to ensure the transport has
divested all of its hardware resources before the underlying RDMA
device can be removed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/trace/events/rpcrdma.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index ecdaf088219d..ba2d6a0e41cc 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -669,6 +669,29 @@ TRACE_EVENT(xprtrdma_inline_thresh,
 DEFINE_CONN_EVENT(connect);
 DEFINE_CONN_EVENT(disconnect);
 
+TRACE_EVENT(xprtrdma_device_removal,
+	TP_PROTO(
+		const struct rdma_cm_id *id
+	),
+
+	TP_ARGS(id),
+
+	TP_STRUCT__entry(
+		__string(name, id->device->name)
+		__array(unsigned char, addr, sizeof(struct sockaddr_in6))
+	),
+
+	TP_fast_assign(
+		__assign_str(name);
+		memcpy(__entry->addr, &id->route.addr.dst_addr,
+		       sizeof(struct sockaddr_in6));
+	),
+
+	TP_printk("device %s to be removed, disconnecting %pISpc\n",
+		__get_str(name), __entry->addr
+	)
+);
+
 DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc);
 
 TRACE_EVENT(xprtrdma_op_connect,
-- 
cgit v1.2.3


From 820620516993c151c88e9b59edc79ed12d1c31cd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 16 Jun 2024 21:21:19 -0400
Subject: NFSv4: Clean up open delegation return structure

Instead of having the fields open coded in the struct nfs_openres,
add a separate structure for them so that we can reuse that code
for the WANT_DELEGATION case.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_xdr.h | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d09b9773b20c..682559e19d9d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -449,6 +449,22 @@ struct stateowner_id {
 	__u32	uniquifier;
 };
 
+struct nfs4_open_delegation {
+	__u32 open_delegation_type;
+	union {
+		struct {
+			fmode_t			type;
+			__u32			do_recall;
+			nfs4_stateid		stateid;
+			unsigned long		pagemod_limit;
+		};
+		struct {
+			__u32			why_no_delegation;
+			__u32			will_notify;
+		};
+	};
+};
+
 /*
  * Arguments to the open call.
  */
@@ -490,13 +506,10 @@ struct nfs_openres {
 	struct nfs_fattr *      f_attr;
 	struct nfs_seqid *	seqid;
 	const struct nfs_server *server;
-	fmode_t			delegation_type;
-	nfs4_stateid		delegation;
-	unsigned long		pagemod_limit;
-	__u32			do_recall;
 	__u32			attrset[NFS4_BITMAP_SIZE];
 	struct nfs4_string	*owner;
 	struct nfs4_string	*group_owner;
+	struct nfs4_open_delegation	delegation;
 	__u32			access_request;
 	__u32			access_supported;
 	__u32			access_result;
-- 
cgit v1.2.3


From 6a68aed602d7b770552c7f890d0c30c53725c7b8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 16 Jun 2024 21:21:21 -0400
Subject: NFSv4: Add new attribute delegation definitions

Add the attribute delegation XDR definitions from the spec.

Signed-off-by: Tom Haynes <loghyr@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs4.h      | 9 +++++++++
 include/uapi/linux/nfs4.h | 2 ++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 0d896ce296ce..c074e0ac390f 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -367,6 +367,8 @@ enum open_delegation_type4 {
 	NFS4_OPEN_DELEGATE_READ = 1,
 	NFS4_OPEN_DELEGATE_WRITE = 2,
 	NFS4_OPEN_DELEGATE_NONE_EXT = 3, /* 4.1 */
+	NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG = 4,
+	NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG = 5,
 };
 
 enum why_no_delegation4 { /* new to v4.1 */
@@ -507,6 +509,11 @@ enum {
 	FATTR4_XATTR_SUPPORT		= 82,
 };
 
+enum {
+	FATTR4_TIME_DELEG_ACCESS	= 84,
+	FATTR4_TIME_DELEG_MODIFY	= 85,
+};
+
 /*
  * The following internal definitions enable processing the above
  * attribute bits within 32-bit word boundaries.
@@ -586,6 +593,8 @@ enum {
 #define FATTR4_WORD2_SECURITY_LABEL     BIT(FATTR4_SEC_LABEL - 64)
 #define FATTR4_WORD2_MODE_UMASK		BIT(FATTR4_MODE_UMASK - 64)
 #define FATTR4_WORD2_XATTR_SUPPORT	BIT(FATTR4_XATTR_SUPPORT - 64)
+#define FATTR4_WORD2_TIME_DELEG_ACCESS	BIT(FATTR4_TIME_DELEG_ACCESS - 64)
+#define FATTR4_WORD2_TIME_DELEG_MODIFY	BIT(FATTR4_TIME_DELEG_MODIFY - 64)
 
 /* MDS threshold bitmap bits */
 #define THRESHOLD_RD                    (1UL << 0)
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index 1d2043708bf1..afd7e32906c3 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -69,6 +69,8 @@
 #define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL	0x10000
 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED		0x20000
 
+#define NFS4_SHARE_WANT_DELEG_TIMESTAMPS		0x100000
+
 #define NFS4_CDFC4_FORE	0x1
 #define NFS4_CDFC4_BACK 0x2
 #define NFS4_CDFC4_BOTH 0x3
-- 
cgit v1.2.3


From 90f9ae74422d7e7447cb0ea21e1227142b58c52a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 16 Jun 2024 21:21:22 -0400
Subject: NFSv4: Plumb in XDR support for the new delegation-only setattr op

We want to send the updated atime and mtime as part of the delegreturn
compound. Add a special structure to hold those variables.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_xdr.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 682559e19d9d..f40be64ce942 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -622,6 +622,13 @@ struct nfs_release_lockowner_res {
 	struct nfs4_sequence_res	seq_res;
 };
 
+struct nfs4_delegattr {
+	struct timespec64	atime;
+	struct timespec64	mtime;
+	bool			atime_set;
+	bool			mtime_set;
+};
+
 struct nfs4_delegreturnargs {
 	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *fhandle;
@@ -629,6 +636,7 @@ struct nfs4_delegreturnargs {
 	const u32 *bitmask;
 	u32 bitmask_store[NFS_BITMASK_SZ];
 	struct nfs4_layoutreturn_args *lr_args;
+	struct nfs4_delegattr *sattr_args;
 };
 
 struct nfs4_delegreturnres {
@@ -637,6 +645,8 @@ struct nfs4_delegreturnres {
 	struct nfs_server *server;
 	struct nfs4_layoutreturn_res *lr_res;
 	int lr_ret;
+	bool sattr_res;
+	int sattr_ret;
 };
 
 /*
-- 
cgit v1.2.3


From 4201916f2ab13577d45876f4bc784be55e4a83da Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 16 Jun 2024 21:21:24 -0400
Subject: NFSv4: Add a flags argument to the 'have_delegation' callback

This argument will be used to allow the caller to specify whether or not
they need to know that this is an attribute delegation.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index f40be64ce942..51611583af51 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1830,7 +1830,7 @@ struct nfs_rpc_ops {
 				int open_flags,
 				struct iattr *iattr,
 				int *);
-	int (*have_delegation)(struct inode *, fmode_t);
+	int (*have_delegation)(struct inode *, fmode_t, int);
 	struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *);
 	struct nfs_client *(*init_client) (struct nfs_client *,
 				const struct nfs_client_initdata *);
-- 
cgit v1.2.3


From 86e1c54d152e2799671e149d6e4d1c1a4a2be857 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 16 Jun 2024 21:21:26 -0400
Subject: NFSv4: Add recovery of attribute delegations

After a reboot of the NFSv4.2 server, the recovery code needs to specify
whether the delegation to be recovered is an attribute delegation or
not.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 51611583af51..d8cfa956d24c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -484,7 +484,7 @@ struct nfs_openargs {
 			nfs4_verifier   verifier; /* EXCLUSIVE */
 		};
 		nfs4_stateid	delegation;		/* CLAIM_DELEGATE_CUR */
-		fmode_t		delegation_type;	/* CLAIM_PREVIOUS */
+		__u32		delegation_type;	/* CLAIM_PREVIOUS */
 	} u;
 	const struct qstr *	name;
 	const struct nfs_server *server;	 /* Needed for ID mapping */
-- 
cgit v1.2.3


From dcb3c20f741993efbd95be78aab93fac29516323 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 16 Jun 2024 21:21:27 -0400
Subject: NFSv4: Add a capability for delegated attributes

Cache whether or not the server may have support for delegated
attributes in a capability flag.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_fs_sb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 92de074e63b9..5a76a87cd924 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -278,6 +278,7 @@ struct nfs_server {
 #define NFS_CAP_LGOPEN		(1U << 5)
 #define NFS_CAP_CASE_INSENSITIVE	(1U << 6)
 #define NFS_CAP_CASE_PRESERVING	(1U << 7)
+#define NFS_CAP_DELEGTIME	(1U << 13)
 #define NFS_CAP_POSIX_LOCK	(1U << 14)
 #define NFS_CAP_UIDGID_NOMAP	(1U << 15)
 #define NFS_CAP_STATEID_NFSV41	(1U << 16)
-- 
cgit v1.2.3


From 707f13b3d081ea811c40014e7b61f2c487ee9a4f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 16 Jun 2024 21:21:32 -0400
Subject: NFSv4: Add support for the FATTR4_OPEN_ARGUMENTS attribute

Query the server for the OPEN arguments that it supports so that
we can figure out which extensions we can use.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs4.h    | 2 ++
 include/linux/nfs_xdr.h | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index c074e0ac390f..f9df88091c6d 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -512,6 +512,7 @@ enum {
 enum {
 	FATTR4_TIME_DELEG_ACCESS	= 84,
 	FATTR4_TIME_DELEG_MODIFY	= 85,
+	FATTR4_OPEN_ARGUMENTS		= 86,
 };
 
 /*
@@ -595,6 +596,7 @@ enum {
 #define FATTR4_WORD2_XATTR_SUPPORT	BIT(FATTR4_XATTR_SUPPORT - 64)
 #define FATTR4_WORD2_TIME_DELEG_ACCESS	BIT(FATTR4_TIME_DELEG_ACCESS - 64)
 #define FATTR4_WORD2_TIME_DELEG_MODIFY	BIT(FATTR4_TIME_DELEG_MODIFY - 64)
+#define FATTR4_WORD2_OPEN_ARGUMENTS	BIT(FATTR4_OPEN_ARGUMENTS - 64)
 
 /* MDS threshold bitmap bits */
 #define THRESHOLD_RD                    (1UL << 0)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d8cfa956d24c..af510a7ec46a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1213,6 +1213,14 @@ struct nfs4_statfs_res {
 	struct nfs_fsstat	       *fsstat;
 };
 
+struct nfs4_open_caps {
+	u32				oa_share_access[1];
+	u32				oa_share_deny[1];
+	u32				oa_share_access_want[1];
+	u32				oa_open_claim[1];
+	u32				oa_createmode[1];
+};
+
 struct nfs4_server_caps_arg {
 	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh		       *fhandle;
@@ -1229,6 +1237,7 @@ struct nfs4_server_caps_res {
 	u32				fh_expire_type;
 	u32				case_insensitive;
 	u32				case_preserving;
+	struct nfs4_open_caps		open_caps;
 };
 
 #define NFS4_PATHNAME_MAXCOMPONENTS 512
-- 
cgit v1.2.3


From d2a00cceb93a4df2afaceb836297628d0aeec7ad Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 16 Jun 2024 21:21:33 -0400
Subject: NFSv4: Detect support for OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION

If the server supports the NFSv4.2 protocol extension to optimise away
returning a stateid when it returns a delegation, then we cache that
information in another capability flag.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_fs_sb.h | 1 +
 include/uapi/linux/nfs4.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 5a76a87cd924..fe5b1a8bd723 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -278,6 +278,7 @@ struct nfs_server {
 #define NFS_CAP_LGOPEN		(1U << 5)
 #define NFS_CAP_CASE_INSENSITIVE	(1U << 6)
 #define NFS_CAP_CASE_PRESERVING	(1U << 7)
+#define NFS_CAP_OPEN_XOR	(1U << 12)
 #define NFS_CAP_DELEGTIME	(1U << 13)
 #define NFS_CAP_POSIX_LOCK	(1U << 14)
 #define NFS_CAP_UIDGID_NOMAP	(1U << 15)
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index afd7e32906c3..caf4db2fcbb9 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -46,6 +46,7 @@
 #define NFS4_OPEN_RESULT_CONFIRM		0x0002
 #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX		0x0004
 #define NFS4_OPEN_RESULT_PRESERVE_UNLINKED	0x0008
+#define NFS4_OPEN_RESULT_NO_OPEN_STATEID	0x0010
 #define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK	0x0020
 
 #define NFS4_SHARE_ACCESS_MASK	0x000F
@@ -70,6 +71,7 @@
 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED		0x20000
 
 #define NFS4_SHARE_WANT_DELEG_TIMESTAMPS		0x100000
+#define NFS4_SHARE_WANT_OPEN_XOR_DELEGATION		0x200000
 
 #define NFS4_CDFC4_FORE	0x1
 #define NFS4_CDFC4_BACK 0x2
-- 
cgit v1.2.3


From adb4b42d19aea91826621a8d0bac94cf2c08f8bc Mon Sep 17 00:00:00 2001
From: Lance Shelton <lance.shelton@primarydata.com>
Date: Sun, 16 Jun 2024 21:21:36 -0400
Subject: Return the delegation when deleting sillyrenamed files

Add a callback to return the delegation in order to allow generic NFS
code to return the delegation when appropriate.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_xdr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index af510a7ec46a..01efacae4634 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1840,6 +1840,7 @@ struct nfs_rpc_ops {
 				struct iattr *iattr,
 				int *);
 	int (*have_delegation)(struct inode *, fmode_t, int);
+	int (*return_delegation)(struct inode *);
 	struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *);
 	struct nfs_client *(*init_client) (struct nfs_client *,
 				const struct nfs_client_initdata *);
-- 
cgit v1.2.3


From 924cf3c91fe29c7ebd1b9d56e10f513c1bd7d4bd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Thu, 13 Jun 2024 01:00:46 -0400
Subject: NFSv4.1: constify the stateid argument in nfs41_test_stateid()

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 01efacae4634..45623af3e7b8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1438,7 +1438,7 @@ struct nfs41_secinfo_no_name_args {
 
 struct nfs41_test_stateid_args {
 	struct nfs4_sequence_args	seq_args;
-	nfs4_stateid			*stateid;
+	nfs4_stateid			stateid;
 };
 
 struct nfs41_test_stateid_res {
-- 
cgit v1.2.3


From 5468fc8298a97ca504aca8ac0c7b9e6fd7c1b588 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Thu, 13 Jun 2024 01:00:55 -0400
Subject: NFSv4/pNFS: Do layout state recovery upon reboot

Some pNFS implementations, such as flexible files, want the client to
send the layout stats and layout errors that may have incurred while the
metadata server was booting. To do so, the client sends a layoutreturn
with an all-zero stateid while the server is in grace during reboot
recovery.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_fs_sb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index fe5b1a8bd723..ba9df1848b35 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -278,6 +278,7 @@ struct nfs_server {
 #define NFS_CAP_LGOPEN		(1U << 5)
 #define NFS_CAP_CASE_INSENSITIVE	(1U << 6)
 #define NFS_CAP_CASE_PRESERVING	(1U << 7)
+#define NFS_CAP_REBOOT_LAYOUTRETURN	(1U << 8)
 #define NFS_CAP_OPEN_XOR	(1U << 12)
 #define NFS_CAP_DELEGTIME	(1U << 13)
 #define NFS_CAP_POSIX_LOCK	(1U << 14)
-- 
cgit v1.2.3


From 2f1f31042ef07719a0d5cb4784b8a32d20c13110 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 1 Jul 2024 12:50:48 +0200
Subject: nfs: Block on write congestion

Commit 6df25e58532b ("nfs: remove reliance on bdi congestion")
introduced NFS-private solution for limiting number of writes
outstanding against a particular server. Unlike previous bdi congestion
this algorithm actually works and limits number of outstanding writeback
pages to nfs_congestion_kb which scales with amount of client's memory
and is capped at 256 MB. As a result some workloads such as random
buffered writes over NFS got slower (from ~170 MB/s to ~126 MB/s). The
fio command to reproduce is:

fio --direct=0 --ioengine=sync --thread --invalidate=1 --group_reporting=1
  --runtime=300 --fallocate=posix --ramp_time=10 --new_group --rw=randwrite
  --size=64256m --numjobs=4 --bs=4k --fsync_on_close=1 --end_fsync=1

This happens because the client sends ~256 MB worth of dirty pages to
the server and any further background writeback request is ignored until
the number of writeback pages gets below the threshold of 192 MB. By the
time this happens and clients decides to trigger another round of
writeback, the server often has no pages to write and the disk is idle.

To fix this problem and make the client react faster to eased congestion
of the server by blocking waiting for congestion to resolve instead of
aborting writeback. This improves the random 4k buffered write
throughput to 184 MB/s.

Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_fs_sb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index ba9df1848b35..1df86ab98c77 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -140,6 +140,7 @@ struct nfs_server {
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
 	struct nlm_host		*nlm_host;	/* NLM client handle */
 	struct nfs_iostats __percpu *io_stats;	/* I/O statistics */
+	wait_queue_head_t	write_congestion_wait;	/* wait until write congestion eases */
 	atomic_long_t		writeback;	/* number of writeback pages */
 	unsigned int		write_congested;/* flag set when writeback gets too high */
 	unsigned int		flags;		/* various flags */
-- 
cgit v1.2.3


From 7e8e78a0ba00c88f0ded86de64bdddc82e06b196 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 1 Jul 2024 07:26:48 +0200
Subject: nfs: remove dead code for the old swap over NFS implementation

Remove the code testing folio_test_swapcache either explicitly or
implicitly in pagemap.h headers, as is now handled using the direct I/O
path and not the buffered I/O path that these helpers are located in.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_page.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 1c315f854ea8..7bc31df457ea 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -208,8 +208,8 @@ static inline struct inode *nfs_page_to_inode(const struct nfs_page *req)
 	struct folio *folio = nfs_page_to_folio(req);
 
 	if (folio == NULL)
-		return page_file_mapping(req->wb_page)->host;
-	return folio_file_mapping(folio)->host;
+		return req->wb_page->mapping->host;
+	return folio->mapping->host;
 }
 
 /**
-- 
cgit v1.2.3


From 9eb7c484db1ae993648fc9b9d48a295f4d99afb8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 1 Jul 2024 07:26:50 +0200
Subject: nfs: simplify nfs_folio_find_and_lock_request

nfs_folio_find_and_lock_request and the nfs_page_group_lock_head helper
called by it spend quite some effort to deal with head vs subrequests.
But given that only the head request can be stashed in the folio private
data, non of that is required.

Fold the locking logic from nfs_page_group_lock_head into
nfs_folio_find_and_lock_request and simplify the result based on the
invariant that we always find the head request in the folio private data.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_page.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 7bc31df457ea..e799d93626f1 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -155,7 +155,6 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	void nfs_unlock_and_release_request(struct nfs_page *);
-extern	struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req);
 extern	int nfs_page_group_lock_subrequests(struct nfs_page *head);
 extern void nfs_join_page_group(struct nfs_page *head,
 				struct nfs_commit_info *cinfo,
-- 
cgit v1.2.3


From 25edbcac6e32eab345e470d56ca9974a577b878b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 1 Jul 2024 07:26:52 +0200
Subject: nfs: fold nfs_page_group_lock_subrequests into
 nfs_lock_and_join_requests

Fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests to
prepare for future changes to this code, and move the helpers to write.c
as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_page.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index e799d93626f1..63eed97a18ad 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -155,7 +155,6 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	void nfs_unlock_and_release_request(struct nfs_page *);
-extern	int nfs_page_group_lock_subrequests(struct nfs_page *head);
 extern void nfs_join_page_group(struct nfs_page *head,
 				struct nfs_commit_info *cinfo,
 				struct inode *inode);
-- 
cgit v1.2.3


From f1b7c7552cbcf89e56b15ff481f3d19b53046291 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 1 Jul 2024 07:26:53 +0200
Subject: nfs: move nfs_wait_on_request to write.c

nfs_wait_on_request is now only used in write.c.  Move it there
and mark it static.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/nfs_page.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 63eed97a18ad..169b4ae30ff4 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -152,7 +152,6 @@ extern	void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
 extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 				struct nfs_page *prev,
 				struct nfs_page *req);
-extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	void nfs_unlock_and_release_request(struct nfs_page *);
 extern void nfs_join_page_group(struct nfs_page *head,
-- 
cgit v1.2.3


From 8e0c8d23952f338180d19718195d4f9fd10a1809 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Thu, 13 Jun 2024 14:34:30 -0400
Subject: sunrpc: fix up the special handling of sv_nrpools == 1

Only pooled services take a reference to the svc_pool_map. The sunrpc
code has always used the sv_nrpools value to detect whether the service
is pooled.

The problem there is that nfsd is a pooled service, but when it's
running in "global" pool_mode, it doesn't take a reference to the pool
map because it has a sv_nrpools value of 1. This means that we have
two separate codepaths for starting the server, depending on whether
it's pooled or not.

Fix this by adding a new flag to the svc_serv, that indicates whether
the serv is pooled. With this we can have the nfsd service
unconditionally take a reference, regardless of pool_mode.

Note that this is a behavior change for
/sys/module/sunrpc/parameters/pool_mode. Usually this file does not
allow you to change the pool-mode while there are nfsd threads running,
but if the pool-mode is "global" it's allowed. My assumption is that
this is a bug, since it probably should never have worked this way.

This patch changes the behavior such that you get back EBUSY even
when nfsd is running in global mode. I think this is more reasonable
behavior, and given that most people set this today using the module
parameter, it's doubtful anyone will notice.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 23617da0e565..d0433e1642b3 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -85,6 +85,7 @@ struct svc_serv {
 	char *			sv_name;	/* service name */
 
 	unsigned int		sv_nrpools;	/* number of thread pools */
+	bool			sv_is_pooled;	/* is this a pooled service? */
 	struct svc_pool *	sv_pools;	/* array of thread pools */
 	int			(*sv_threadfn)(void *data);
 
-- 
cgit v1.2.3


From 5f71f3c325534da55dfda487a67208e2e0f0cd1b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Thu, 13 Jun 2024 14:34:33 -0400
Subject: sunrpc: refactor pool_mode setting code

Allow the pool_mode setting code to be called from internal callers
so we can call it from a new netlink op. Add a new svc_pool_map_get
function to return the current setting. Change the existing module
parameter handling to use the new interfaces under the hood.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d0433e1642b3..a7d0406b9ef5 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -399,6 +399,8 @@ struct svc_procedure {
 /*
  * Function prototypes.
  */
+int sunrpc_set_pool_mode(const char *val);
+int sunrpc_get_pool_mode(char *val, size_t size);
 int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
 void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
 int svc_bind(struct svc_serv *serv, struct net *net);
-- 
cgit v1.2.3


From 00506072d70829196849e835ee4ef0b350b61fb9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 14 Jun 2024 08:59:10 -0400
Subject: nfsd: new netlink ops to get/set server pool_mode

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/uapi/linux/nfsd_netlink.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nfsd_netlink.h b/include/uapi/linux/nfsd_netlink.h
index 24c86dbc7ed5..887cbd12b695 100644
--- a/include/uapi/linux/nfsd_netlink.h
+++ b/include/uapi/linux/nfsd_netlink.h
@@ -70,6 +70,14 @@ enum {
 	NFSD_A_SERVER_SOCK_MAX = (__NFSD_A_SERVER_SOCK_MAX - 1)
 };
 
+enum {
+	NFSD_A_POOL_MODE_MODE = 1,
+	NFSD_A_POOL_MODE_NPOOLS,
+
+	__NFSD_A_POOL_MODE_MAX,
+	NFSD_A_POOL_MODE_MAX = (__NFSD_A_POOL_MODE_MAX - 1)
+};
+
 enum {
 	NFSD_CMD_RPC_STATUS_GET = 1,
 	NFSD_CMD_THREADS_SET,
@@ -78,6 +86,8 @@ enum {
 	NFSD_CMD_VERSION_GET,
 	NFSD_CMD_LISTENER_SET,
 	NFSD_CMD_LISTENER_GET,
+	NFSD_CMD_POOL_MODE_SET,
+	NFSD_CMD_POOL_MODE_GET,
 
 	__NFSD_CMD_MAX,
 	NFSD_CMD_MAX = (__NFSD_CMD_MAX - 1)
-- 
cgit v1.2.3


From 7aa291962f4c3b7afb9a12fa60b406b95e5eacb4 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 27 Jun 2024 13:04:22 +0200
Subject: dt-bindings: clock: airoha: Add reset support to EN7581 clock binding

Introduce reset capability to EN7581 device-tree clock binding
documentation. Add reset register mapping between misc scu and pb scu
ones in order to follow the memory order. This change is not
introducing any backward compatibility issue since the EN7581 dts is not
upstream yet.

Fixes: 0a382be005cf ("dt-bindings: clock: airoha: add EN7581 binding")
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Rob Herring (Arm) <robh@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/28fef3e83062d5d71e7b4be4b47583f851a15bf8.1719485847.git.lorenzo@kernel.org
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/reset/airoha,en7581-reset.h | 66 +++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 include/dt-bindings/reset/airoha,en7581-reset.h

(limited to 'include')

diff --git a/include/dt-bindings/reset/airoha,en7581-reset.h b/include/dt-bindings/reset/airoha,en7581-reset.h
new file mode 100644
index 000000000000..6544a1790b83
--- /dev/null
+++ b/include/dt-bindings/reset/airoha,en7581-reset.h
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024 AIROHA Inc
+ * Author: Lorenzo Bianconi <lorenzo@kernel.org>
+ */
+
+#ifndef __DT_BINDINGS_RESET_CONTROLLER_AIROHA_EN7581_H_
+#define __DT_BINDINGS_RESET_CONTROLLER_AIROHA_EN7581_H_
+
+/* RST_CTRL2 */
+#define EN7581_XPON_PHY_RST		 0
+#define EN7581_CPU_TIMER2_RST		 1
+#define EN7581_HSUART_RST		 2
+#define EN7581_UART4_RST		 3
+#define EN7581_UART5_RST		 4
+#define EN7581_I2C2_RST			 5
+#define EN7581_XSI_MAC_RST		 6
+#define EN7581_XSI_PHY_RST		 7
+#define EN7581_NPU_RST			 8
+#define EN7581_I2S_RST			 9
+#define EN7581_TRNG_RST			10
+#define EN7581_TRNG_MSTART_RST		11
+#define EN7581_DUAL_HSI0_RST		12
+#define EN7581_DUAL_HSI1_RST		13
+#define EN7581_HSI_RST			14
+#define EN7581_DUAL_HSI0_MAC_RST	15
+#define EN7581_DUAL_HSI1_MAC_RST	16
+#define EN7581_HSI_MAC_RST		17
+#define EN7581_WDMA_RST			18
+#define EN7581_WOE0_RST			19
+#define EN7581_WOE1_RST			20
+#define EN7581_HSDMA_RST		21
+#define EN7581_TDMA_RST			22
+#define EN7581_EMMC_RST			23
+#define EN7581_SOE_RST			24
+#define EN7581_PCIE2_RST		25
+#define EN7581_XFP_MAC_RST		26
+#define EN7581_USB_HOST_P1_RST		27
+#define EN7581_USB_HOST_P1_U3_PHY_RST	28
+/* RST_CTRL1 */
+#define EN7581_PCM1_ZSI_ISI_RST		29
+#define EN7581_FE_PDMA_RST		30
+#define EN7581_FE_QDMA_RST		31
+#define EN7581_PCM_SPIWP_RST		32
+#define EN7581_CRYPTO_RST		33
+#define EN7581_TIMER_RST		34
+#define EN7581_PCM1_RST			35
+#define EN7581_UART_RST			36
+#define EN7581_GPIO_RST			37
+#define EN7581_GDMA_RST			38
+#define EN7581_I2C_MASTER_RST		39
+#define EN7581_PCM2_ZSI_ISI_RST		40
+#define EN7581_SFC_RST			41
+#define EN7581_UART2_RST		42
+#define EN7581_GDMP_RST			43
+#define EN7581_FE_RST			44
+#define EN7581_USB_HOST_P0_RST		45
+#define EN7581_GSW_RST			46
+#define EN7581_SFC2_PCM_RST		47
+#define EN7581_PCIE0_RST		48
+#define EN7581_PCIE1_RST		49
+#define EN7581_CPU_TIMER_RST		50
+#define EN7581_PCIE_HB_RST		51
+#define EN7581_XPON_MAC_RST		52
+
+#endif /* __DT_BINDINGS_RESET_CONTROLLER_AIROHA_EN7581_H_ */
-- 
cgit v1.2.3


From 1cb6f0bae50441f4b4b32a28315853b279c7404e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 8 Jul 2024 15:31:29 +0200
Subject: bpf: Fix too early release of tcx_entry

Pedro Pinto and later independently also Hyunwoo Kim and Wongi Lee reported
an issue that the tcx_entry can be released too early leading to a use
after free (UAF) when an active old-style ingress or clsact qdisc with a
shared tc block is later replaced by another ingress or clsact instance.

Essentially, the sequence to trigger the UAF (one example) can be as follows:

  1. A network namespace is created
  2. An ingress qdisc is created. This allocates a tcx_entry, and
     &tcx_entry->miniq is stored in the qdisc's miniqp->p_miniq. At the
     same time, a tcf block with index 1 is created.
  3. chain0 is attached to the tcf block. chain0 must be connected to
     the block linked to the ingress qdisc to later reach the function
     tcf_chain0_head_change_cb_del() which triggers the UAF.
  4. Create and graft a clsact qdisc. This causes the ingress qdisc
     created in step 1 to be removed, thus freeing the previously linked
     tcx_entry:

     rtnetlink_rcv_msg()
       => tc_modify_qdisc()
         => qdisc_create()
           => clsact_init() [a]
         => qdisc_graft()
           => qdisc_destroy()
             => __qdisc_destroy()
               => ingress_destroy() [b]
                 => tcx_entry_free()
                   => kfree_rcu() // tcx_entry freed

  5. Finally, the network namespace is closed. This registers the
     cleanup_net worker, and during the process of releasing the
     remaining clsact qdisc, it accesses the tcx_entry that was
     already freed in step 4, causing the UAF to occur:

     cleanup_net()
       => ops_exit_list()
         => default_device_exit_batch()
           => unregister_netdevice_many()
             => unregister_netdevice_many_notify()
               => dev_shutdown()
                 => qdisc_put()
                   => clsact_destroy() [c]
                     => tcf_block_put_ext()
                       => tcf_chain0_head_change_cb_del()
                         => tcf_chain_head_change_item()
                           => clsact_chain_head_change()
                             => mini_qdisc_pair_swap() // UAF

There are also other variants, the gist is to add an ingress (or clsact)
qdisc with a specific shared block, then to replace that qdisc, waiting
for the tcx_entry kfree_rcu() to be executed and subsequently accessing
the current active qdisc's miniq one way or another.

The correct fix is to turn the miniq_active boolean into a counter. What
can be observed, at step 2 above, the counter transitions from 0->1, at
step [a] from 1->2 (in order for the miniq object to remain active during
the replacement), then in [b] from 2->1 and finally [c] 1->0 with the
eventual release. The reference counter in general ranges from [0,2] and
it does not need to be atomic since all access to the counter is protected
by the rtnl mutex. With this in place, there is no longer a UAF happening
and the tcx_entry is freed at the correct time.

Fixes: e420bed02507 ("bpf: Add fd-based tcx multi-prog infra with link support")
Reported-by: Pedro Pinto <xten@osec.io>
Co-developed-by: Pedro Pinto <xten@osec.io>
Signed-off-by: Pedro Pinto <xten@osec.io>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Hyunwoo Kim <v4bel@theori.io>
Cc: Wongi Lee <qwerty@theori.io>
Cc: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20240708133130.11609-1-daniel@iogearbox.net
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/net/tcx.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/tcx.h b/include/net/tcx.h
index 72a3e75e539f..5ce0ce9e0c02 100644
--- a/include/net/tcx.h
+++ b/include/net/tcx.h
@@ -13,7 +13,7 @@ struct mini_Qdisc;
 struct tcx_entry {
 	struct mini_Qdisc __rcu *miniq;
 	struct bpf_mprog_bundle bundle;
-	bool miniq_active;
+	u32 miniq_active;
 	struct rcu_head rcu;
 };
 
@@ -125,11 +125,16 @@ static inline void tcx_skeys_dec(bool ingress)
 	tcx_dec();
 }
 
-static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry,
-					const bool active)
+static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry)
 {
 	ASSERT_RTNL();
-	tcx_entry(entry)->miniq_active = active;
+	tcx_entry(entry)->miniq_active++;
+}
+
+static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry)
+{
+	ASSERT_RTNL();
+	tcx_entry(entry)->miniq_active--;
 }
 
 static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry)
-- 
cgit v1.2.3


From 1f9a8286bc0c3df7d789ea625d9d9db3d7779f2d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 May 2024 14:58:03 +0000
Subject: uaccess: always export _copy_[from|to]_user with CONFIG_RUST

Rust code needs to be able to access _copy_from_user and _copy_to_user
so that it can skip the check_copy_size check in cases where the length
is known at compile-time, mirroring the logic for when C code will skip
check_copy_size. To do this, we ensure that exported versions of these
methods are available when CONFIG_RUST is enabled.

Alice has verified that this patch passes the CONFIG_TEST_USER_COPY test
on x86 using the Android cuttlefish emulator.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lore.kernel.org/r/20240528-alice-mm-v7-2-78222c31b8f4@google.com
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 include/linux/uaccess.h | 46 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 3064314f4832..d8e4105a2f21 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -5,6 +5,7 @@
 #include <linux/fault-inject-usercopy.h>
 #include <linux/instrumented.h>
 #include <linux/minmax.h>
+#include <linux/nospec.h>
 #include <linux/sched.h>
 #include <linux/thread_info.h>
 
@@ -138,13 +139,26 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
 	return raw_copy_to_user(to, from, n);
 }
 
-#ifdef INLINE_COPY_FROM_USER
+/*
+ * Architectures that #define INLINE_COPY_TO_USER use this function
+ * directly in the normal copy_to/from_user(), the other ones go
+ * through an extern _copy_to/from_user(), which expands the same code
+ * here.
+ *
+ * Rust code always uses the extern definition.
+ */
 static inline __must_check unsigned long
-_copy_from_user(void *to, const void __user *from, unsigned long n)
+_inline_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	unsigned long res = n;
 	might_fault();
 	if (!should_fail_usercopy() && likely(access_ok(from, n))) {
+		/*
+		 * Ensure that bad access_ok() speculation will not
+		 * lead to nasty side effects *after* the copy is
+		 * finished:
+		 */
+		barrier_nospec();
 		instrument_copy_from_user_before(to, from, n);
 		res = raw_copy_from_user(to, from, n);
 		instrument_copy_from_user_after(to, from, n, res);
@@ -153,14 +167,11 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
 		memset(to + (n - res), 0, res);
 	return res;
 }
-#else
 extern __must_check unsigned long
 _copy_from_user(void *, const void __user *, unsigned long);
-#endif
 
-#ifdef INLINE_COPY_TO_USER
 static inline __must_check unsigned long
-_copy_to_user(void __user *to, const void *from, unsigned long n)
+_inline_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
 	if (should_fail_usercopy())
@@ -171,25 +182,32 @@ _copy_to_user(void __user *to, const void *from, unsigned long n)
 	}
 	return n;
 }
-#else
 extern __must_check unsigned long
 _copy_to_user(void __user *, const void *, unsigned long);
-#endif
 
 static __always_inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	if (check_copy_size(to, n, false))
-		n = _copy_from_user(to, from, n);
-	return n;
+	if (!check_copy_size(to, n, false))
+		return n;
+#ifdef INLINE_COPY_FROM_USER
+	return _inline_copy_from_user(to, from, n);
+#else
+	return _copy_from_user(to, from, n);
+#endif
 }
 
 static __always_inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	if (check_copy_size(from, n, true))
-		n = _copy_to_user(to, from, n);
-	return n;
+	if (!check_copy_size(from, n, true))
+		return n;
+
+#ifdef INLINE_COPY_TO_USER
+	return _inline_copy_to_user(to, from, n);
+#else
+	return _copy_to_user(to, from, n);
+#endif
 }
 
 #ifndef copy_mc_to_kernel
-- 
cgit v1.2.3


From 14498e993fb77adce75f0106162902b2f8b1d480 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 3 Jul 2024 14:37:50 -0700
Subject: Input: make events() method return number of events processed

In preparation to consolidating filtering and event processing in the
input core change events() method to return number of events processed
by it.

Reviewed-by: Jeff LaBundy <jeff@labundy.com>
Reviewed-by: Benjamin Tissoires <bentiss@kernel.org>
Link: https://lore.kernel.org/r/20240703213756.3375978-4-dmitry.torokhov@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/input.h b/include/linux/input.h
index c22ac465254b..89a0be6ee0e2 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -275,7 +275,8 @@ struct input_handle;
  *	it may not sleep
  * @events: event sequence handler. This method is being called by
  *	input core with interrupts disabled and dev->event_lock
- *	spinlock held and so it may not sleep
+ *	spinlock held and so it may not sleep. The method must return
+ *	number of events passed to it.
  * @filter: similar to @event; separates normal event handlers from
  *	"filters".
  * @match: called after comparing device's id with handler's id_table
@@ -312,8 +313,8 @@ struct input_handler {
 	void *private;
 
 	void (*event)(struct input_handle *handle, unsigned int type, unsigned int code, int value);
-	void (*events)(struct input_handle *handle,
-		       const struct input_value *vals, unsigned int count);
+	unsigned int (*events)(struct input_handle *handle,
+			       struct input_value *vals, unsigned int count);
 	bool (*filter)(struct input_handle *handle, unsigned int type, unsigned int code, int value);
 	bool (*match)(struct input_handler *handler, struct input_dev *dev);
 	int (*connect)(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id);
-- 
cgit v1.2.3


From 6badc62f8fa4a1165f3f440943045b73c7a47735 Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Mon, 27 May 2024 18:14:40 +0200
Subject: of: dynamic: Constify parameter in
 of_changeset_add_prop_string_array()

The str_array parameter has no reason to be an un-const array.
Indeed, elements of the 'str_array' array are not changed by the code.

Constify the 'str_array' array parameter.
With this const qualifier added, the following construction is allowed:
  static const char * const tab_str[] = { "string1", "string2" };
  of_changeset_add_prop_string_array(..., tab_str, ARRAY_SIZE(tab_str));

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Link: https://lore.kernel.org/r/20240527161450.326615-14-herve.codina@bootlin.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index a0bedd038a05..ee9a385a13db 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -1639,7 +1639,7 @@ int of_changeset_add_prop_string(struct of_changeset *ocs,
 int of_changeset_add_prop_string_array(struct of_changeset *ocs,
 				       struct device_node *np,
 				       const char *prop_name,
-				       const char **str_array, size_t sz);
+				       const char * const *str_array, size_t sz);
 int of_changeset_add_prop_u32_array(struct of_changeset *ocs,
 				    struct device_node *np,
 				    const char *prop_name,
-- 
cgit v1.2.3


From f2b388d63e6c7c6151bb295e5cbf48a2ac91e51a Mon Sep 17 00:00:00 2001
From: Herve Codina <herve.codina@bootlin.com>
Date: Mon, 27 May 2024 18:14:42 +0200
Subject: of: dynamic: Introduce of_changeset_add_prop_bool()

APIs to add some properties in a changeset exist but nothing to add a DT
boolean property (i.e. a property without any values).

Fill this lack with of_changeset_add_prop_bool().

Signed-off-by: Herve Codina <herve.codina@bootlin.com>
Link: https://lore.kernel.org/r/20240527161450.326615-16-herve.codina@bootlin.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index ee9a385a13db..13cf7a43b473 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -1652,6 +1652,9 @@ static inline int of_changeset_add_prop_u32(struct of_changeset *ocs,
 	return of_changeset_add_prop_u32_array(ocs, np, prop_name, &val, 1);
 }
 
+int of_changeset_add_prop_bool(struct of_changeset *ocs, struct device_node *np,
+			       const char *prop_name);
+
 #else /* CONFIG_OF_DYNAMIC */
 static inline int of_reconfig_notifier_register(struct notifier_block *nb)
 {
-- 
cgit v1.2.3


From b4b1ddc9dfe997a5f492fa3a36487f8e7a5de30d Mon Sep 17 00:00:00 2001
From: Lizhe <sensor1010@163.com>
Date: Thu, 4 Jul 2024 12:23:55 +0530
Subject: cpufreq: Make cpufreq_driver->exit() return void

The cpufreq core doesn't check the return type of the exit() callback
and there is not much the core can do on failures at that point. Just
drop the returned value and make it return void.

Signed-off-by: Lizhe <sensor1010@163.com>
[ Viresh: Reworked the patches to fix all missing changes together. ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com> # Mediatek
Acked-by: Sudeep Holla <sudeep.holla@arm.com> # scpi, scmi, vexpress
Acked-by: Mario Limonciello <mario.limonciello@amd.com> # amd
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com> # bmips
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Acked-by: Kevin Hilman <khilman@baylibre.com> # omap
---
 include/linux/cpufreq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 20f7e98ee8af..e3b3face9134 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -396,7 +396,7 @@ struct cpufreq_driver {
 
 	int		(*online)(struct cpufreq_policy *policy);
 	int		(*offline)(struct cpufreq_policy *policy);
-	int		(*exit)(struct cpufreq_policy *policy);
+	void		(*exit)(struct cpufreq_policy *policy);
 	int		(*suspend)(struct cpufreq_policy *policy);
 	int		(*resume)(struct cpufreq_policy *policy);
 
-- 
cgit v1.2.3


From 4aa99c71e42ad60178c1154ec24e3df9c684fb67 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 24 Jun 2024 19:01:17 +0200
Subject: jbd2: make jbd2_journal_get_max_txn_bufs() internal

There's no reason to have jbd2_journal_get_max_txn_bufs() public
function. Currently all users are internal and can use
journal->j_max_transaction_buffers instead. This saves some unnecessary
recomputations of the limit as a bonus which becomes important as this
function gets more complex in the following patch.

CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Link: https://patch.msgid.link/20240624170127.3253-1-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/jbd2.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index ab04c1c27fae..f91b930abe20 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1660,11 +1660,6 @@ int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
 int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
 int jbd2_fc_release_bufs(journal_t *journal);
 
-static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal)
-{
-	return (journal->j_total_len - journal->j_fc_wbufsize) / 4;
-}
-
 /*
  * is_journal_abort
  *
-- 
cgit v1.2.3


From e3a00a23781c1f2fcda98a7aecaac515558e7a35 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 24 Jun 2024 19:01:18 +0200
Subject: jbd2: precompute number of transaction descriptor blocks

Instead of computing the number of descriptor blocks a transaction can
have each time we need it (which is currently when starting each
transaction but will become more frequent later) precompute the number
once during journal initialization together with maximum transaction
size. We perform the precomputation whenever journal feature set is
updated similarly as for computation of
journal->j_revoke_records_per_block.

CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Link: https://patch.msgid.link/20240624170127.3253-2-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/jbd2.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f91b930abe20..b900c642210c 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1085,6 +1085,13 @@ struct journal_s
 	 */
 	int			j_revoke_records_per_block;
 
+	/**
+	 * @j_transaction_overhead:
+	 *
+	 * Number of blocks each transaction needs for its own bookkeeping
+	 */
+	int			j_transaction_overhead_buffers;
+
 	/**
 	 * @j_commit_interval:
 	 *
-- 
cgit v1.2.3


From fe3d508ba95bc63adba661355115be340275c0d1 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Mon, 8 Jul 2024 09:16:48 +0000
Subject: block: Validate logical block size in blk_validate_limits()

Some drivers validate that their own logical block size. It is no harm to
always do this, so validate in blk_validate_limits().

This allows us to remove the validation in most of those drivers.

Add a comment to blk_validate_block_size() to inform users that self-
validation of LBS is usually unnecessary.

Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20240708091651.177447-3-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 02e04df27282..dce4a6bf7307 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -268,6 +268,7 @@ static inline dev_t disk_devt(struct gendisk *disk)
 	return MKDEV(disk->major, disk->first_minor);
 }
 
+/* blk_validate_limits() validates bsize, so drivers don't usually need to */
 static inline int blk_validate_block_size(unsigned long bsize)
 {
 	if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
-- 
cgit v1.2.3


From 97c4264f62a73664a2934203346a3e04c109b8ec Mon Sep 17 00:00:00 2001
From: Peter Griffin <peter.griffin@linaro.org>
Date: Tue, 2 Jul 2024 08:35:09 +0200
Subject: soc: samsung: exynos-pmu: add support for PMU_ALIVE non atomic
 registers

Not all registers in PMU_ALIVE block support atomic set/clear operations.
GS101_SYSIP_DAT0 and GS101_SYSTEM_CONFIGURATION registers are two regs
where attempting atomic access fails.

As documentation on exactly which registers support atomic operations is
not forthcoming. We default to atomic access, unless the register is
explicitly added to the tensor_is_atomic() function. Update the comment
to reflect this as well.

Reviewed-by: Will McVicker <willmcvicker@google.com>
Tested-by: Will McVicker <willmcvicker@google.com>
Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
Link: https://lore.kernel.org/r/20240628223506.1237523-4-peter.griffin@linaro.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240702063514.6215-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/soc/samsung/exynos-regs-pmu.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index aa840ed043e1..f411c176536d 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -657,4 +657,8 @@
 #define EXYNOS5433_PAD_RETENTION_UFS_OPTION			(0x3268)
 #define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION		(0x32A8)
 
+/* For Tensor GS101 */
+#define GS101_SYSIP_DAT0					(0x810)
+#define GS101_SYSTEM_CONFIGURATION				(0x3A00)
+
 #endif /* __LINUX_SOC_EXYNOS_REGS_PMU_H */
-- 
cgit v1.2.3


From e6c06ca8f21d1cdb444c708e385d86a54bc5fc60 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 9 Jul 2024 10:38:30 +0200
Subject: wifi: cfg80211: add support for advertising multiple radios belonging
 to a wiphy

The prerequisite for MLO support in cfg80211/mac80211 is that all the links
participating in MLO must be from the same wiphy/ieee80211_hw. To meet this
expectation, some drivers may need to group multiple discrete hardware each
acting as a link in MLO under single wiphy.

With this change, supported frequencies and interface combinations of each
individual radio are reported to user space. This allows user space to figure
out the limitations of what combination of channels can be used concurrently.

Even for non-MLO devices, this improves support for devices capable of
running on multiple channels at the same time.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/18a88f9ce82b1c9f7c12f1672430eaf2bb0be295.1720514221.git-series.nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 42 +++++++++++++++++++++++++++-
 include/uapi/linux/nl80211.h | 65 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6f992aff74ae..a00cf80e61dc 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5046,7 +5046,9 @@ struct ieee80211_iface_limit {
  * struct ieee80211_iface_combination - possible interface combination
  *
  * With this structure the driver can describe which interface
- * combinations it supports concurrently.
+ * combinations it supports concurrently. When set in a struct wiphy_radio,
+ * the combinations refer to combinations of interfaces currently active on
+ * that radio.
  *
  * Examples:
  *
@@ -5406,6 +5408,38 @@ struct wiphy_iftype_akm_suites {
 	int n_akm_suites;
 };
 
+/**
+ * struct wiphy_radio_freq_range - wiphy frequency range
+ * @start_freq:  start range edge frequency (kHz)
+ * @end_freq:    end range edge frequency (kHz)
+ */
+struct wiphy_radio_freq_range {
+	u32 start_freq;
+	u32 end_freq;
+};
+
+
+/**
+ * struct wiphy_radio - physical radio of a wiphy
+ * This structure describes a physical radio belonging to a wiphy.
+ * It is used to describe concurrent-channel capabilities. Only one channel
+ * can be active on the radio described by struct wiphy_radio.
+ *
+ * @freq_range: frequency range that the radio can operate on.
+ * @n_freq_range: number of elements in @freq_range
+ *
+ * @iface_combinations: Valid interface combinations array, should not
+ *	list single interface types.
+ * @n_iface_combinations: number of entries in @iface_combinations array.
+ */
+struct wiphy_radio {
+	const struct wiphy_radio_freq_range *freq_range;
+	int n_freq_range;
+
+	const struct ieee80211_iface_combination *iface_combinations;
+	int n_iface_combinations;
+};
+
 #define CFG80211_HW_TIMESTAMP_ALL_PEERS	0xffff
 
 /**
@@ -5624,6 +5658,9 @@ struct wiphy_iftype_akm_suites {
  *	A value of %CFG80211_HW_TIMESTAMP_ALL_PEERS indicates the driver
  *	supports enabling HW timestamping for all peers (i.e. no need to
  *	specify a mac address).
+ *
+ * @radio: radios belonging to this wiphy
+ * @n_radio: number of radios
  */
 struct wiphy {
 	struct mutex mtx;
@@ -5774,6 +5811,9 @@ struct wiphy {
 
 	u16 hw_timestamp_max_peers;
 
+	int n_radio;
+	const struct wiphy_radio *radio;
+
 	char priv[] __aligned(NETDEV_ALIGN);
 };
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 6ae3997061b6..f97f5adc8d51 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2052,6 +2052,10 @@ enum nl80211_commands {
  * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported
  *	interface combinations. In each nested item, it contains attributes
  *	defined in &enum nl80211_if_combination_attrs.
+ *	If the wiphy uses multiple radios (@NL80211_ATTR_WIPHY_RADIOS is set),
+ *	this attribute contains the interface combinations of the first radio.
+ *	See @NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS for the global wiphy
+ *	combinations for the sum of all radios.
  * @NL80211_ATTR_SOFTWARE_IFTYPES: Nested attribute (just like
  *	%NL80211_ATTR_SUPPORTED_IFTYPES) containing the interface types that
  *	are managed in software: interfaces of these types aren't subject to
@@ -2856,6 +2860,14 @@ enum nl80211_commands {
  *	%NL80211_CMD_ASSOCIATE indicating the SPP A-MSDUs
  *	are used on this connection
  *
+ * @NL80211_ATTR_WIPHY_RADIOS: Nested attribute describing physical radios
+ *	belonging to this wiphy. See &enum nl80211_wiphy_radio_attrs.
+ *
+ * @NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS: Nested attribute listing the
+ *	supported interface combinations for all radios combined. In each
+ *	nested item, it contains attributes defined in
+ *	&enum nl80211_if_combination_attrs.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3401,6 +3413,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_ASSOC_SPP_AMSDU,
 
+	NL80211_ATTR_WIPHY_RADIOS,
+	NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -8005,4 +8020,54 @@ enum nl80211_ap_settings_flags {
 	NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT	= 1 << 1,
 };
 
+/**
+ * enum nl80211_wiphy_radio_attrs - wiphy radio attributes
+ *
+ * @__NL80211_WIPHY_RADIO_ATTR_INVALID: Invalid
+ *
+ * @NL80211_WIPHY_RADIO_ATTR_INDEX: Index of this radio (u32)
+ * @NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE: Frequency range supported by this
+ *	radio. Attribute may be present multiple times.
+ * @NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION: Supported interface
+ *	combination for this radio. Attribute may be present multiple times
+ *	and contains attributes defined in &enum nl80211_if_combination_attrs.
+ *
+ * @__NL80211_WIPHY_RADIO_ATTR_LAST: Internal
+ * @NL80211_WIPHY_RADIO_ATTR_MAX: Highest attribute
+ */
+enum nl80211_wiphy_radio_attrs {
+	__NL80211_WIPHY_RADIO_ATTR_INVALID,
+
+	NL80211_WIPHY_RADIO_ATTR_INDEX,
+	NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE,
+	NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION,
+
+	/* keep last */
+	__NL80211_WIPHY_RADIO_ATTR_LAST,
+	NL80211_WIPHY_RADIO_ATTR_MAX = __NL80211_WIPHY_RADIO_ATTR_LAST - 1,
+};
+
+/**
+ * enum nl80211_wiphy_radio_freq_range - wiphy radio frequency range
+ *
+ * @__NL80211_WIPHY_RADIO_FREQ_ATTR_INVALID: Invalid
+ *
+ * @NL80211_WIPHY_RADIO_FREQ_ATTR_START: Frequency range start (u32).
+ *	The unit is kHz.
+ * @NL80211_WIPHY_RADIO_FREQ_ATTR_END: Frequency range end (u32).
+ *	The unit is kHz.
+ *
+ * @__NL80211_WIPHY_RADIO_FREQ_ATTR_LAST: Internal
+ * @NL80211_WIPHY_RADIO_FREQ_ATTR_MAX: Highest attribute
+ */
+enum nl80211_wiphy_radio_freq_range {
+	__NL80211_WIPHY_RADIO_FREQ_ATTR_INVALID,
+
+	NL80211_WIPHY_RADIO_FREQ_ATTR_START,
+	NL80211_WIPHY_RADIO_FREQ_ATTR_END,
+
+	__NL80211_WIPHY_RADIO_FREQ_ATTR_LAST,
+	NL80211_WIPHY_RADIO_FREQ_ATTR_MAX = __NL80211_WIPHY_RADIO_FREQ_ATTR_LAST - 1,
+};
+
 #endif /* __LINUX_NL80211_H */
-- 
cgit v1.2.3


From abb4cfe3661aa05426916b21164f88ca5a405a3a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 9 Jul 2024 10:38:31 +0200
Subject: wifi: cfg80211: extend interface combination check for multi-radio

Add a field in struct iface_combination_params to check per-radio
interface combinations instead of per-wiphy ones.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/32b28da89c2d759b0324deeefe2be4cee91de18e.1720514221.git-series.nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a00cf80e61dc..4767e2c76b01 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1598,6 +1598,7 @@ struct cfg80211_color_change_settings {
  *
  * Used to pass interface combination parameters
  *
+ * @radio_idx: wiphy radio index or -1 for global
  * @num_different_channels: the number of different channels we want
  *	to use for verification
  * @radar_detect: a bitmap where each bit corresponds to a channel
@@ -1611,6 +1612,7 @@ struct cfg80211_color_change_settings {
  *	the verification
  */
 struct iface_combination_params {
+	int radio_idx;
 	int num_different_channels;
 	u8 radar_detect;
 	int iftype_num[NUM_NL80211_IFTYPES];
@@ -4580,6 +4582,8 @@ struct mgmt_frame_regs {
  *
  * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames.
  * @set_ttlm: set the TID to link mapping.
+ * @get_radio_mask: get bitmask of radios in use.
+ *	(invoked with the wiphy mutex held)
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4941,6 +4945,7 @@ struct cfg80211_ops {
 				    struct cfg80211_set_hw_timestamp *hwts);
 	int	(*set_ttlm)(struct wiphy *wiphy, struct net_device *dev,
 			    struct cfg80211_ttlm_params *params);
+	u32	(*get_radio_mask)(struct wiphy *wiphy, struct net_device *dev);
 };
 
 /*
-- 
cgit v1.2.3


From 417d88189ccf645a157a3dc3da7e894c1f2829d1 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@toblux.com>
Date: Thu, 4 Jul 2024 22:25:59 +0200
Subject: sctp: Fix typos and improve comments

Fix typos s/steam/stream/ and spell out Schedule/Unschedule in the
comments.

Compile-tested only.

Signed-off-by: Thorsten Blum <thorsten.blum@toblux.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20240704202558.62704-2-thorsten.blum@toblux.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/sctp/stream_sched.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h
index 572d73fdcd5e..8034bf5febbe 100644
--- a/include/net/sctp/stream_sched.h
+++ b/include/net/sctp/stream_sched.h
@@ -35,10 +35,10 @@ struct sctp_sched_ops {
 	struct sctp_chunk *(*dequeue)(struct sctp_outq *q);
 	/* Called only if the chunk fit the packet */
 	void (*dequeue_done)(struct sctp_outq *q, struct sctp_chunk *chunk);
-	/* Sched all chunks already enqueued */
-	void (*sched_all)(struct sctp_stream *steam);
-	/* Unched all chunks already enqueued */
-	void (*unsched_all)(struct sctp_stream *steam);
+	/* Schedule all chunks already enqueued */
+	void (*sched_all)(struct sctp_stream *stream);
+	/* Unschedule all chunks already enqueued */
+	void (*unsched_all)(struct sctp_stream *stream);
 };
 
 int sctp_sched_set_sched(struct sctp_association *asoc,
-- 
cgit v1.2.3


From 510dba80ed669d6123901ccf0476706122b008b1 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 9 Jul 2024 10:38:32 +0200
Subject: wifi: cfg80211: add helper for checking if a chandef is valid on a
 radio

Check if the full channel width is in the radio's frequency range.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/7c8ea146feb6f37cee62e5ba6be5370403695797.1720514221.git-series.nbd@nbd.name
[add missing Return: documentation]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4767e2c76b01..192d72c8b465 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6508,6 +6508,17 @@ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan)
 	return ieee80211_frequency_to_channel(chan->center_freq) % 16 == 5;
 }
 
+/**
+ * cfg80211_radio_chandef_valid - Check if the radio supports the chandef
+ *
+ * @radio: wiphy radio
+ * @chandef: chandef for current channel
+ *
+ * Return: whether or not the given chandef is valid for the given radio
+ */
+bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio,
+				  const struct cfg80211_chan_def *chandef);
+
 /**
  * ieee80211_get_response_rate - get basic rate for a given rate
  *
-- 
cgit v1.2.3


From 2920bc8d916d30b5273ec16e6878f13b24e3851f Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 9 Jul 2024 10:38:34 +0200
Subject: wifi: mac80211: add radio index to ieee80211_chanctx_conf

Will be used to explicitly assign a channel context to a wiphy radio.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://patch.msgid.link/59f76f57d935f155099276be22badfa671d5bfd9.1720514221.git-series.nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index bd0f8aefa797..e78ccbe38d6d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -250,6 +250,7 @@ struct ieee80211_chan_req {
  * @min_def: the minimum channel definition currently required.
  * @ap: the channel definition the AP actually is operating as,
  *	for use with (wider bandwidth) OFDMA
+ * @radio_idx: index of the wiphy radio used used for this channel
  * @rx_chains_static: The number of RX chains that must always be
  *	active on the channel to receive MIMO transmissions
  * @rx_chains_dynamic: The number of RX chains that must be enabled
@@ -264,6 +265,7 @@ struct ieee80211_chanctx_conf {
 	struct cfg80211_chan_def min_def;
 	struct cfg80211_chan_def ap;
 
+	int radio_idx;
 	u8 rx_chains_static, rx_chains_dynamic;
 
 	bool radar_enabled;
-- 
cgit v1.2.3


From ef6dfcbcbbf7d9a414feb44aa093d2d8592a2e20 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 13 Feb 2024 23:02:20 +0100
Subject: mfd: tmio: Remove obsolete platform_data

With commit 8971bb812e3c ("mfd: remove toshiba tmio drivers"), all users
of platform data for NAND and framebuffers are gone. So, remove
definitions from the header, too.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20240213220221.2380-9-wsa+renesas@sang-engineering.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/tmio.h | 27 ---------------------------
 1 file changed, 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index eace8ea6cda0..bc53323293a3 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -103,31 +103,4 @@ struct tmio_mmc_data {
 	void (*set_pwr)(struct platform_device *host, int state);
 	void (*set_clk_div)(struct platform_device *host, int state);
 };
-
-/*
- * data for the NAND controller
- */
-struct tmio_nand_data {
-	struct nand_bbt_descr	*badblock_pattern;
-	struct mtd_partition	*partition;
-	unsigned int		num_partitions;
-	const char *const	*part_parsers;
-};
-
-#define FBIO_TMIO_ACC_WRITE	0x7C639300
-#define FBIO_TMIO_ACC_SYNC	0x7C639301
-
-struct tmio_fb_data {
-	int			(*lcd_set_power)(struct platform_device *fb_dev,
-						 bool on);
-	int			(*lcd_mode)(struct platform_device *fb_dev,
-					    const struct fb_videomode *mode);
-	int			num_modes;
-	struct fb_videomode	*modes;
-
-	/* in mm: size of screen */
-	int			height;
-	int			width;
-};
-
 #endif
-- 
cgit v1.2.3


From 6bec678b9872271ce2e0879c360ad7a1a524c7fa Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 13 Feb 2024 23:02:21 +0100
Subject: mfd: tmio: Remove obsolete io accessors

Since commit 568494db6809 ("mtd: remove tmio_nand driver") and commit
aceae7848624 ("fbdev: remove tmiofb driver"), these accessors have no
users anymore. Remove them.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20240213220221.2380-10-wsa+renesas@sang-engineering.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/tmio.h | 25 -------------------------
 1 file changed, 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index bc53323293a3..4223315d2b2a 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -10,31 +10,6 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
-#define tmio_ioread8(addr) readb(addr)
-#define tmio_ioread16(addr) readw(addr)
-#define tmio_ioread16_rep(r, b, l) readsw(r, b, l)
-#define tmio_ioread32(addr) \
-	(((u32)readw((addr))) | (((u32)readw((addr) + 2)) << 16))
-
-#define tmio_iowrite8(val, addr) writeb((val), (addr))
-#define tmio_iowrite16(val, addr) writew((val), (addr))
-#define tmio_iowrite16_rep(r, b, l) writesw(r, b, l)
-#define tmio_iowrite32(val, addr) \
-	do { \
-		writew((val),       (addr)); \
-		writew((val) >> 16, (addr) + 2); \
-	} while (0)
-
-#define sd_config_write8(base, shift, reg, val) \
-	tmio_iowrite8((val), (base) + ((reg) << (shift)))
-#define sd_config_write16(base, shift, reg, val) \
-	tmio_iowrite16((val), (base) + ((reg) << (shift)))
-#define sd_config_write32(base, shift, reg, val) \
-	do { \
-		tmio_iowrite16((val), (base) + ((reg) << (shift)));   \
-		tmio_iowrite16((val) >> 16, (base) + ((reg + 2) << (shift))); \
-	} while (0)
-
 /* tmio MMC platform flags */
 /*
  * Some controllers can support a 2-byte block size when the bus width
-- 
cgit v1.2.3


From d411ccbe103d665a31861987e2f1b36944ab63f2 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 13 Feb 2024 23:02:23 +0100
Subject: mfd: tmio: Update include files

Remove meanwhile unneeded includes, only add types.h for dma_addr_t.
Also, remove an obsolete forward declaration while here.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20240213220221.2380-12-wsa+renesas@sang-engineering.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/tmio.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 4223315d2b2a..f71d4e507dcb 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -2,13 +2,8 @@
 #ifndef MFD_TMIO_H
 #define MFD_TMIO_H
 
-#include <linux/device.h>
-#include <linux/fb.h>
-#include <linux/io.h>
-#include <linux/jiffies.h>
-#include <linux/mmc/card.h>
 #include <linux/platform_device.h>
-#include <linux/pm_runtime.h>
+#include <linux/types.h>
 
 /* tmio MMC platform flags */
 /*
@@ -59,8 +54,6 @@
 /* Some controllers have a CBSY bit */
 #define TMIO_MMC_HAVE_CBSY		BIT(11)
 
-struct dma_chan;
-
 /*
  * data for the MMC controller
  */
-- 
cgit v1.2.3


From 763135b819ad3e3e0301b66094d50923e64092a7 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 13 Feb 2024 23:02:24 +0100
Subject: mfd: tmio: Sanitize comments

Reformat the comments to utilize the maximum line length and use single
line comments where appropriate. Remove superfluous comments, too.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20240213220221.2380-13-wsa+renesas@sang-engineering.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/tmio.h | 32 +++++++++++---------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index f71d4e507dcb..1cf418643da9 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -5,23 +5,23 @@
 #include <linux/platform_device.h>
 #include <linux/types.h>
 
-/* tmio MMC platform flags */
+/* TMIO MMC platform flags */
+
 /*
- * Some controllers can support a 2-byte block size when the bus width
- * is configured in 4-bit mode.
+ * Some controllers can support a 2-byte block size when the bus width is
+ * configured in 4-bit mode.
  */
 #define TMIO_MMC_BLKSZ_2BYTES		BIT(1)
-/*
- * Some controllers can support SDIO IRQ signalling.
- */
+
+/* Some controllers can support SDIO IRQ signalling */
 #define TMIO_MMC_SDIO_IRQ		BIT(2)
 
 /* Some features are only available or tested on R-Car Gen2 or later */
 #define TMIO_MMC_MIN_RCAR2		BIT(3)
 
 /*
- * Some controllers require waiting for the SD bus to become
- * idle before writing to some registers.
+ * Some controllers require waiting for the SD bus to become idle before
+ * writing to some registers.
  */
 #define TMIO_MMC_HAS_IDLE_WAIT		BIT(4)
 
@@ -32,31 +32,21 @@
  */
 #define TMIO_MMC_USE_BUSY_TIMEOUT	BIT(5)
 
-/*
- * Some controllers have CMD12 automatically
- * issue/non-issue register
- */
+/* Some controllers have CMD12 automatically issue/non-issue register */
 #define TMIO_MMC_HAVE_CMD12_CTRL	BIT(7)
 
 /* Controller has some SDIO status bits which must be 1 */
 #define TMIO_MMC_SDIO_STATUS_SETBITS	BIT(8)
 
-/*
- * Some controllers have a 32-bit wide data port register
- */
+/* Some controllers have a 32-bit wide data port register */
 #define TMIO_MMC_32BIT_DATA_PORT	BIT(9)
 
-/*
- * Some controllers allows to set SDx actual clock
- */
+/* Some controllers allows to set SDx actual clock */
 #define TMIO_MMC_CLK_ACTUAL		BIT(10)
 
 /* Some controllers have a CBSY bit */
 #define TMIO_MMC_HAVE_CBSY		BIT(11)
 
-/*
- * data for the MMC controller
- */
 struct tmio_mmc_data {
 	void				*chan_priv_tx;
 	void				*chan_priv_rx;
-- 
cgit v1.2.3


From 70b46487b1558eb25ea6e533c905131b10596dc0 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 13 Feb 2024 23:02:25 +0100
Subject: mfd: tmio: Move header to platform_data

All the MFD components are gone from the header meanwhile. Only the MMC
relevant data is left which makes it a platform_data for the MMC
controller. Move the header to the now fitting directory.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # For MMC
Acked-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20240213220221.2380-14-wsa+renesas@sang-engineering.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 include/linux/mfd/tmio.h           | 64 --------------------------------------
 include/linux/platform_data/tmio.h | 64 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 64 deletions(-)
 delete mode 100644 include/linux/mfd/tmio.h
 create mode 100644 include/linux/platform_data/tmio.h

(limited to 'include')

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
deleted file mode 100644
index 1cf418643da9..000000000000
--- a/include/linux/mfd/tmio.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef MFD_TMIO_H
-#define MFD_TMIO_H
-
-#include <linux/platform_device.h>
-#include <linux/types.h>
-
-/* TMIO MMC platform flags */
-
-/*
- * Some controllers can support a 2-byte block size when the bus width is
- * configured in 4-bit mode.
- */
-#define TMIO_MMC_BLKSZ_2BYTES		BIT(1)
-
-/* Some controllers can support SDIO IRQ signalling */
-#define TMIO_MMC_SDIO_IRQ		BIT(2)
-
-/* Some features are only available or tested on R-Car Gen2 or later */
-#define TMIO_MMC_MIN_RCAR2		BIT(3)
-
-/*
- * Some controllers require waiting for the SD bus to become idle before
- * writing to some registers.
- */
-#define TMIO_MMC_HAS_IDLE_WAIT		BIT(4)
-
-/*
- * Use the busy timeout feature. Probably all TMIO versions support it. Yet,
- * we don't have documentation for old variants, so we enable only known good
- * variants with this flag. Can be removed once all variants are known good.
- */
-#define TMIO_MMC_USE_BUSY_TIMEOUT	BIT(5)
-
-/* Some controllers have CMD12 automatically issue/non-issue register */
-#define TMIO_MMC_HAVE_CMD12_CTRL	BIT(7)
-
-/* Controller has some SDIO status bits which must be 1 */
-#define TMIO_MMC_SDIO_STATUS_SETBITS	BIT(8)
-
-/* Some controllers have a 32-bit wide data port register */
-#define TMIO_MMC_32BIT_DATA_PORT	BIT(9)
-
-/* Some controllers allows to set SDx actual clock */
-#define TMIO_MMC_CLK_ACTUAL		BIT(10)
-
-/* Some controllers have a CBSY bit */
-#define TMIO_MMC_HAVE_CBSY		BIT(11)
-
-struct tmio_mmc_data {
-	void				*chan_priv_tx;
-	void				*chan_priv_rx;
-	unsigned int			hclk;
-	unsigned long			capabilities;
-	unsigned long			capabilities2;
-	unsigned long			flags;
-	u32				ocr_mask;	/* available voltages */
-	dma_addr_t			dma_rx_offset;
-	unsigned int			max_blk_count;
-	unsigned short			max_segs;
-	void (*set_pwr)(struct platform_device *host, int state);
-	void (*set_clk_div)(struct platform_device *host, int state);
-};
-#endif
diff --git a/include/linux/platform_data/tmio.h b/include/linux/platform_data/tmio.h
new file mode 100644
index 000000000000..1cf418643da9
--- /dev/null
+++ b/include/linux/platform_data/tmio.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef MFD_TMIO_H
+#define MFD_TMIO_H
+
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+/* TMIO MMC platform flags */
+
+/*
+ * Some controllers can support a 2-byte block size when the bus width is
+ * configured in 4-bit mode.
+ */
+#define TMIO_MMC_BLKSZ_2BYTES		BIT(1)
+
+/* Some controllers can support SDIO IRQ signalling */
+#define TMIO_MMC_SDIO_IRQ		BIT(2)
+
+/* Some features are only available or tested on R-Car Gen2 or later */
+#define TMIO_MMC_MIN_RCAR2		BIT(3)
+
+/*
+ * Some controllers require waiting for the SD bus to become idle before
+ * writing to some registers.
+ */
+#define TMIO_MMC_HAS_IDLE_WAIT		BIT(4)
+
+/*
+ * Use the busy timeout feature. Probably all TMIO versions support it. Yet,
+ * we don't have documentation for old variants, so we enable only known good
+ * variants with this flag. Can be removed once all variants are known good.
+ */
+#define TMIO_MMC_USE_BUSY_TIMEOUT	BIT(5)
+
+/* Some controllers have CMD12 automatically issue/non-issue register */
+#define TMIO_MMC_HAVE_CMD12_CTRL	BIT(7)
+
+/* Controller has some SDIO status bits which must be 1 */
+#define TMIO_MMC_SDIO_STATUS_SETBITS	BIT(8)
+
+/* Some controllers have a 32-bit wide data port register */
+#define TMIO_MMC_32BIT_DATA_PORT	BIT(9)
+
+/* Some controllers allows to set SDx actual clock */
+#define TMIO_MMC_CLK_ACTUAL		BIT(10)
+
+/* Some controllers have a CBSY bit */
+#define TMIO_MMC_HAVE_CBSY		BIT(11)
+
+struct tmio_mmc_data {
+	void				*chan_priv_tx;
+	void				*chan_priv_rx;
+	unsigned int			hclk;
+	unsigned long			capabilities;
+	unsigned long			capabilities2;
+	unsigned long			flags;
+	u32				ocr_mask;	/* available voltages */
+	dma_addr_t			dma_rx_offset;
+	unsigned int			max_blk_count;
+	unsigned short			max_segs;
+	void (*set_pwr)(struct platform_device *host, int state);
+	void (*set_clk_div)(struct platform_device *host, int state);
+};
+#endif
-- 
cgit v1.2.3


From 32625ac9e110da530db4f0faf918b1f5674e7ca3 Mon Sep 17 00:00:00 2001
From: Xianwei Zhao <xianwei.zhao@amlogic.com>
Date: Thu, 27 Jun 2024 19:47:51 +0800
Subject: dt-bindings: power: add Amlogic A5 power domains

Add devicetree binding document and related header file for
Amlogic A5 secure power domains.

Signed-off-by: Hongyu Chen <hongyu.chen1@amlogic.com>
Signed-off-by: Xianwei Zhao <xianwei.zhao@amlogic.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240627-a5_secpower-v1-1-1f47dde1270c@amlogic.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/dt-bindings/power/amlogic,a5-pwrc.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 include/dt-bindings/power/amlogic,a5-pwrc.h

(limited to 'include')

diff --git a/include/dt-bindings/power/amlogic,a5-pwrc.h b/include/dt-bindings/power/amlogic,a5-pwrc.h
new file mode 100644
index 000000000000..3a6f53eb959f
--- /dev/null
+++ b/include/dt-bindings/power/amlogic,a5-pwrc.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */
+/*
+ * Copyright (C) 2024 Amlogic, Inc. All rights reserved
+ */
+
+#ifndef _DT_BINDINGS_AMLOGIC_A5_POWER_H
+#define _DT_BINDINGS_AMLOGIC_A5_POWER_H
+
+#define PWRC_A5_NNA_ID			0
+#define PWRC_A5_AUDIO_ID		1
+#define PWRC_A5_SDIOA_ID		2
+#define PWRC_A5_EMMC_ID			3
+#define PWRC_A5_USB_COMB_ID		4
+#define PWRC_A5_ETH_ID			5
+#define PWRC_A5_RSA_ID			6
+#define PWRC_A5_AUDIO_PDM_ID		7
+#define PWRC_A5_DMC_ID			8
+#define PWRC_A5_SYS_WRAP_ID		9
+#define PWRC_A5_DSPA_ID			10
+
+#endif
-- 
cgit v1.2.3


From 95f6454da936e4e6418facddf66596442a842d74 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 24 Jun 2024 10:18:05 +0530
Subject: PM: domains: Allow devices attached to genpd to be managed by HW

Some power-domains may be capable of relying on the HW to control the power
for a device that's hooked up to it. Typically, for these kinds of
configurations the consumer driver should be able to change the behavior of
power domain at runtime, control the power domain in SW mode for certain
configurations and handover the control to HW mode for other usecases.

To allow a consumer driver to change the behaviour of the PM domain for its
device, let's provide a new function, dev_pm_genpd_set_hwmode(). Moreover,
let's add a corresponding optional genpd callback, ->set_hwmode_dev(),
which the genpd provider should implement if it can support switching
between HW controlled mode and SW controlled mode. Similarly, add the
dev_pm_genpd_get_hwmode() to allow consumers to read the current mode and
its corresponding optional genpd callback, ->get_hwmode_dev(), which the
genpd provider can also implement to synchronize the initial HW mode
state in genpd_add_device() by reading back the mode from the hardware.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Signed-off-by: Jagadeesh Kona <quic_jkona@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Dhruva Gole <d-gole@ti.com>
Reviewed-by: Taniya Das <quic_tdas@quicinc.com>
Link: https://lore.kernel.org/r/20240624044809.17751-2-quic_jkona@quicinc.com
---
 include/linux/pm_domain.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index f24546a3d3db..015751b64746 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -175,6 +175,10 @@ struct generic_pm_domain {
 	int (*set_performance_state)(struct generic_pm_domain *genpd,
 				     unsigned int state);
 	struct gpd_dev_ops dev_ops;
+	int (*set_hwmode_dev)(struct generic_pm_domain *domain,
+			      struct device *dev, bool enable);
+	bool (*get_hwmode_dev)(struct generic_pm_domain *domain,
+			      struct device *dev);
 	int (*attach_dev)(struct generic_pm_domain *domain,
 			  struct device *dev);
 	void (*detach_dev)(struct generic_pm_domain *domain,
@@ -237,6 +241,7 @@ struct generic_pm_domain_data {
 	unsigned int performance_state;
 	unsigned int default_pstate;
 	unsigned int rpm_pstate;
+	bool hw_mode;
 	void *data;
 };
 
@@ -267,6 +272,8 @@ int dev_pm_genpd_remove_notifier(struct device *dev);
 void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next);
 ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev);
 void dev_pm_genpd_synced_poweroff(struct device *dev);
+int dev_pm_genpd_set_hwmode(struct device *dev, bool enable);
+bool dev_pm_genpd_get_hwmode(struct device *dev);
 
 extern struct dev_power_governor simple_qos_governor;
 extern struct dev_power_governor pm_domain_always_on_gov;
@@ -340,6 +347,16 @@ static inline ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev)
 static inline void dev_pm_genpd_synced_poweroff(struct device *dev)
 { }
 
+static inline int dev_pm_genpd_set_hwmode(struct device *dev, bool enable)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool dev_pm_genpd_get_hwmode(struct device *dev)
+{
+	return false;
+}
+
 #define simple_qos_governor		(*(struct dev_power_governor *)(NULL))
 #define pm_domain_always_on_gov		(*(struct dev_power_governor *)(NULL))
 #endif
-- 
cgit v1.2.3


From 68cbd415dd4b9c5b9df69f0f091879e56bf5907a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Fri, 21 Jun 2024 11:15:58 +0200
Subject: task_work: s/task_work_cancel()/task_work_cancel_func()/

A proper task_work_cancel() API that actually cancels a callback and not
*any* callback pointing to a given function is going to be needed for
perf events event freeing. Do the appropriate rename to prepare for
that.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20240621091601.18227-2-frederic@kernel.org
---
 include/linux/task_work.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index 795ef5a68429..23ab01ae185e 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -30,7 +30,7 @@ int task_work_add(struct task_struct *task, struct callback_head *twork,
 
 struct callback_head *task_work_cancel_match(struct task_struct *task,
 	bool (*match)(struct callback_head *, void *data), void *data);
-struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
+struct callback_head *task_work_cancel_func(struct task_struct *, task_work_func_t);
 void task_work_run(void);
 
 static inline void exit_task_work(struct task_struct *task)
-- 
cgit v1.2.3


From f409530e4db9dd11b88cb7703c97c8f326ff6566 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Fri, 21 Jun 2024 11:15:59 +0200
Subject: task_work: Introduce task_work_cancel() again

Re-introduce task_work_cancel(), this time to cancel an actual callback
and not *any* callback pointing to a given function. This is going to be
needed for perf events event freeing.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20240621091601.18227-3-frederic@kernel.org
---
 include/linux/task_work.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index 23ab01ae185e..26b8a47f41fc 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -31,6 +31,7 @@ int task_work_add(struct task_struct *task, struct callback_head *twork,
 struct callback_head *task_work_cancel_match(struct task_struct *task,
 	bool (*match)(struct callback_head *, void *data), void *data);
 struct callback_head *task_work_cancel_func(struct task_struct *, task_work_func_t);
+bool task_work_cancel(struct task_struct *task, struct callback_head *cb);
 void task_work_run(void);
 
 static inline void exit_task_work(struct task_struct *task)
-- 
cgit v1.2.3


From 3a5465418f5fd970e86a86c7f4075be262682840 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Fri, 21 Jun 2024 11:16:01 +0200
Subject: perf: Fix event leak upon exec and file release

The perf pending task work is never waited upon the matching event
release. In the case of a child event, released via free_event()
directly, this can potentially result in a leaked event, such as in the
following scenario that doesn't even require a weak IRQ work
implementation to trigger:

schedule()
   prepare_task_switch()
=======> <NMI>
      perf_event_overflow()
         event->pending_sigtrap = ...
         irq_work_queue(&event->pending_irq)
<======= </NMI>
      perf_event_task_sched_out()
          event_sched_out()
              event->pending_sigtrap = 0;
              atomic_long_inc_not_zero(&event->refcount)
              task_work_add(&event->pending_task)
   finish_lock_switch()
=======> <IRQ>
   perf_pending_irq()
      //do nothing, rely on pending task work
<======= </IRQ>

begin_new_exec()
   perf_event_exit_task()
      perf_event_exit_event()
         // If is child event
         free_event()
            WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1)
            // event is leaked

Similar scenarios can also happen with perf_event_remove_on_exec() or
simply against concurrent perf_event_release().

Fix this with synchonizing against the possibly remaining pending task
work while freeing the event, just like is done with remaining pending
IRQ work. This means that the pending task callback neither need nor
should hold a reference to the event, preventing it from ever beeing
freed.

Fixes: 517e6a301f34 ("perf: Fix perf_pending_task() UaF")
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20240621091601.18227-5-frederic@kernel.org
---
 include/linux/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a5304ae8c654..393fb13733b0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -786,6 +786,7 @@ struct perf_event {
 	struct irq_work			pending_irq;
 	struct callback_head		pending_task;
 	unsigned int			pending_work;
+	struct rcuwait			pending_work_wait;
 
 	atomic_t			event_limit;
 
-- 
cgit v1.2.3


From 466e4d801cd438a1ab2c8a2cce1bef6b65c31bbb Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 4 Jul 2024 19:03:36 +0200
Subject: task_work: Add TWA_NMI_CURRENT as an additional notify mode.

Adding task_work from NMI context requires the following:
- The kasan_record_aux_stack() is not NMU safe and must be avoided.
- Using TWA_RESUME is NMI safe. If the NMI occurs while the CPU is in
  userland then it will continue in userland and not invoke the `work'
  callback.

Add TWA_NMI_CURRENT as an additional notify mode. In this mode skip
kasan and use irq_work in hardirq-mode to for needed interrupt. Set
TIF_NOTIFY_RESUME within the irq_work callback due to k[ac]san
instrumentation in test_and_set_bit() which does not look NMI safe in
case of a report.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240704170424.1466941-3-bigeasy@linutronix.de
---
 include/linux/task_work.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index 26b8a47f41fc..cf5e7e891a77 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -18,6 +18,7 @@ enum task_work_notify_mode {
 	TWA_RESUME,
 	TWA_SIGNAL,
 	TWA_SIGNAL_NO_IPI,
+	TWA_NMI_CURRENT,
 };
 
 static inline bool task_work_pending(struct task_struct *task)
-- 
cgit v1.2.3


From c5d93d23a26012a98b77f9e354ab9b3afd420059 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 4 Jul 2024 19:03:37 +0200
Subject: perf: Enqueue SIGTRAP always via task_work.

A signal is delivered by raising irq_work() which works from any context
including NMI. irq_work() can be delayed if the architecture does not
provide an interrupt vector. In order not to lose a signal, the signal
is injected via task_work during event_sched_out().

Instead going via irq_work, the signal could be added directly via
task_work. The signal is sent to current and can be enqueued on its
return path to userland.

Queue signal via task_work and consider possible NMI context. Remove
perf_event::pending_sigtrap and and use perf_event::pending_work
instead.

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Marco Elver <elver@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/r/20240704170424.1466941-4-bigeasy@linutronix.de
---
 include/linux/perf_event.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 393fb13733b0..ea0d82418d85 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -781,7 +781,6 @@ struct perf_event {
 	unsigned int			pending_wakeup;
 	unsigned int			pending_kill;
 	unsigned int			pending_disable;
-	unsigned int			pending_sigtrap;
 	unsigned long			pending_addr;	/* SIGTRAP */
 	struct irq_work			pending_irq;
 	struct callback_head		pending_task;
@@ -963,7 +962,7 @@ struct perf_event_context {
 	struct rcu_head			rcu_head;
 
 	/*
-	 * Sum (event->pending_sigtrap + event->pending_work)
+	 * Sum (event->pending_work + event->pending_work)
 	 *
 	 * The SIGTRAP is targeted at ctx->task, as such it won't do changing
 	 * that until the signal is delivered.
-- 
cgit v1.2.3


From 0d40a6d83e3e6751f1107ba33587262d937c969f Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 4 Jul 2024 19:03:39 +0200
Subject: perf: Move swevent_htable::recursion into task_struct.

The swevent_htable::recursion counter is used to avoid creating an
swevent while an event is processed to avoid recursion. The counter is
per-CPU and preemption must be disabled to have a stable counter.
perf_pending_task() disables preemption to access the counter and then
signal. This is problematic on PREEMPT_RT because sending a signal uses
a spinlock_t which must not be acquired in atomic on PREEMPT_RT because
it becomes a sleeping lock.

The atomic context can be avoided by moving the counter into the
task_struct. There is a 4 byte hole between futex_state (usually always
on) and the following perf pointer (perf_event_ctxp). After the
recursion lost some weight it fits perfectly.

Move swevent_htable::recursion into task_struct.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20240704170424.1466941-6-bigeasy@linutronix.de
---
 include/linux/perf_event.h | 6 ------
 include/linux/sched.h      | 7 +++++++
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ea0d82418d85..99a7ea1d29ed 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -970,12 +970,6 @@ struct perf_event_context {
 	local_t				nr_pending;
 };
 
-/*
- * Number of contexts where an event can trigger:
- *	task, softirq, hardirq, nmi.
- */
-#define PERF_NR_CONTEXTS	4
-
 struct perf_cpu_pmu_context {
 	struct perf_event_pmu_context	epc;
 	struct perf_event_pmu_context	*task_epc;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61591ac6eab6..afb1087f5831 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -734,6 +734,12 @@ enum perf_event_task_context {
 	perf_nr_task_contexts,
 };
 
+/*
+ * Number of contexts where an event can trigger:
+ *      task, softirq, hardirq, nmi.
+ */
+#define PERF_NR_CONTEXTS	4
+
 struct wake_q_node {
 	struct wake_q_node *next;
 };
@@ -1256,6 +1262,7 @@ struct task_struct {
 	unsigned int			futex_state;
 #endif
 #ifdef CONFIG_PERF_EVENTS
+	u8				perf_recursion[PERF_NR_CONTEXTS];
 	struct perf_event_context	*perf_event_ctxp;
 	struct mutex			perf_event_mutex;
 	struct list_head		perf_event_list;
-- 
cgit v1.2.3


From 2b84def990d388bed4afe4f21ae383a01991046c Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 4 Jul 2024 19:03:41 +0200
Subject: perf: Split __perf_pending_irq() out of perf_pending_irq()

perf_pending_irq() invokes perf_event_wakeup() and __perf_pending_irq().
The former is in charge of waking any tasks which waits to be woken up
while the latter disables perf-events.

The irq_work perf_pending_irq(), while this an irq_work, the callback
is invoked in thread context on PREEMPT_RT. This is needed because all
the waking functions (wake_up_all(), kill_fasync()) acquire sleep locks
which must not be used with disabled interrupts.
Disabling events, as done by __perf_pending_irq(), expects a hardirq
context and disabled interrupts. This requirement is not fulfilled on
PREEMPT_RT.

Split functionality based on perf_event::pending_disable into irq_work
named `pending_disable_irq' and invoke it in hardirq context on
PREEMPT_RT. Rename the split out callback to perf_pending_disable().

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Marco Elver <elver@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/r/20240704170424.1466941-8-bigeasy@linutronix.de
---
 include/linux/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 99a7ea1d29ed..65ece0d5b4b6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -783,6 +783,7 @@ struct perf_event {
 	unsigned int			pending_disable;
 	unsigned long			pending_addr;	/* SIGTRAP */
 	struct irq_work			pending_irq;
+	struct irq_work			pending_disable_irq;
 	struct callback_head		pending_task;
 	unsigned int			pending_work;
 	struct rcuwait			pending_work_wait;
-- 
cgit v1.2.3


From f70080c5bc39716f434e7c0888662aa077eb4405 Mon Sep 17 00:00:00 2001
From: Dragos Tatulea <dtatulea@nvidia.com>
Date: Wed, 26 Jun 2024 13:26:46 +0300
Subject: vdpa/mlx5: Add support for modifying the virtio_version VQ field

This is done in preparation for the pre-creation of hardware virtqueues
at device add time.

Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Message-Id: <20240626-stage-vdpa-vq-precreate-v2-10-560c491078df@nvidia.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/mlx5/mlx5_ifc_vdpa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
index 40371c916cf9..34f27c01cec9 100644
--- a/include/linux/mlx5/mlx5_ifc_vdpa.h
+++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
@@ -148,6 +148,7 @@ enum {
 	MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS           = (u64)1 << 6,
 	MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX       = (u64)1 << 7,
 	MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX        = (u64)1 << 8,
+	MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION	= (u64)1 << 10,
 	MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY            = (u64)1 << 11,
 	MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY          = (u64)1 << 14,
 };
-- 
cgit v1.2.3


From cdc3c7eaae695738b37f374866950602ec5af9c2 Mon Sep 17 00:00:00 2001
From: Dragos Tatulea <dtatulea@nvidia.com>
Date: Wed, 26 Jun 2024 13:26:47 +0300
Subject: vdpa/mlx5: Add support for modifying the VQ features field

This is done in preparation for the pre-creation of hardware virtqueues
at device add time.

Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Message-Id: <20240626-stage-vdpa-vq-precreate-v2-11-560c491078df@nvidia.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/mlx5/mlx5_ifc_vdpa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h
index 34f27c01cec9..58dfa2ee7c83 100644
--- a/include/linux/mlx5/mlx5_ifc_vdpa.h
+++ b/include/linux/mlx5/mlx5_ifc_vdpa.h
@@ -150,6 +150,7 @@ enum {
 	MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX        = (u64)1 << 8,
 	MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION	= (u64)1 << 10,
 	MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY            = (u64)1 << 11,
+	MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES		= (u64)1 << 12,
 	MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY          = (u64)1 << 14,
 };
 
-- 
cgit v1.2.3


From 444b89875fc0937ece181fa865c75c9d22649986 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 28 Jun 2024 11:08:48 -0700
Subject: ARM: spitz: Use software nodes to describe MMC GPIOs

Convert Spitz to use software nodes for specifying GPIOs for the MMC.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20240628180852.1738922-9-dmitry.torokhov@gmail.com
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/platform_data/mmc-pxamci.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/platform_data/mmc-pxamci.h b/include/linux/platform_data/mmc-pxamci.h
index 7e44e84e7150..652f323b5ecc 100644
--- a/include/linux/platform_data/mmc-pxamci.h
+++ b/include/linux/platform_data/mmc-pxamci.h
@@ -7,6 +7,7 @@
 
 struct device;
 struct mmc_host;
+struct property_entry;
 
 struct pxamci_platform_data {
 	unsigned int ocr_mask;			/* available voltages */
@@ -18,7 +19,8 @@ struct pxamci_platform_data {
 	bool gpio_card_ro_invert;		/* gpio ro is inverted */
 };
 
-extern void pxa_set_mci_info(struct pxamci_platform_data *info);
+extern void pxa_set_mci_info(const struct pxamci_platform_data *info,
+			     const struct property_entry *props);
 extern void pxa3xx_set_mci2_info(struct pxamci_platform_data *info);
 extern void pxa3xx_set_mci3_info(struct pxamci_platform_data *info);
 
-- 
cgit v1.2.3


From d05374dee295d771261d382f97c0c23cb15aa104 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 5 Jul 2024 21:44:50 +0200
Subject: thermal: core: Change passive_delay and polling_delay data type

It is better to use unsigned int as the data type for the passive_delay
and polling_delay arguments of thermal_zone_device_register_with_trips()
because they are implicitly cast to unsigned int anyway in
thermal_set_delay_jiffies() and if they happen to be negative at that
point, the resulting behavior may not be as desired.

Update the thermal_zone_device_register_with_trips() definition
accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://patch.msgid.link/5803791.DvuYhMxLoT@rjwysocki.net
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/thermal.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index f732dab20368..cd0045915af5 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -221,7 +221,8 @@ struct thermal_zone_device *thermal_zone_device_register_with_trips(
 					int num_trips, void *devdata,
 					const struct thermal_zone_device_ops *ops,
 					const struct thermal_zone_params *tzp,
-					int passive_delay, int polling_delay);
+					unsigned int passive_delay,
+					unsigned int polling_delay);
 
 struct thermal_zone_device *thermal_tripless_zone_device_register(
 					const char *type,
-- 
cgit v1.2.3


From 463b86fed2b25ddb5f576376bfea00134dd23030 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Jul 2024 16:39:55 +0200
Subject: thermal: helpers: Introduce thermal_trip_is_bound_to_cdev()

Introduce a new helper function thermal_trip_is_bound_to_cdev() for
checking whether or not a given trip point has been bound to a given
cooling device.

The primary user of it will be the Tegra thermal driver.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/13545762.uLZWGnKmhe@rjwysocki.net
---
 include/linux/thermal.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index cd0045915af5..5a0b66bd34f0 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -270,6 +270,9 @@ struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name);
 int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp);
 int thermal_zone_get_slope(struct thermal_zone_device *tz);
 int thermal_zone_get_offset(struct thermal_zone_device *tz);
+bool thermal_trip_is_bound_to_cdev(struct thermal_zone_device *tz,
+				   const struct thermal_trip *trip,
+				   struct thermal_cooling_device *cdev);
 
 int thermal_zone_device_enable(struct thermal_zone_device *tz);
 int thermal_zone_device_disable(struct thermal_zone_device *tz);
-- 
cgit v1.2.3


From d1fbf18a0f9403df2edeffa1c7f5a2d66e82c20a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Jul 2024 16:41:03 +0200
Subject: thermal: trip: Add conversion macros for thermal trip priv field

Some drivers will need to store integers in the priv field of struct
thermal_trip, so add conversion macros for doing this in a consistent
way and switch over the int340x_thermal driver that already does it and
uses custom conversion functions to using the new macros.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/3297884.aeNJFYEL58@rjwysocki.net
---
 include/linux/thermal.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 5a0b66bd34f0..3bd1b361ec34 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -79,6 +79,9 @@ struct thermal_trip {
 #define THERMAL_TRIP_FLAG_RW	(THERMAL_TRIP_FLAG_RW_TEMP | \
 				 THERMAL_TRIP_FLAG_RW_HYST)
 
+#define THERMAL_TRIP_PRIV_TO_INT(_val_)	(uintptr_t)(_val_)
+#define THERMAL_INT_TO_TRIP_PRIV(_val_)	(void *)(uintptr_t)(_val_)
+
 struct thermal_zone_device;
 
 struct thermal_zone_device_ops {
-- 
cgit v1.2.3


From be5db7581f59621ed9cb9cbf6bebccda38263eb5 Mon Sep 17 00:00:00 2001
From: Shenghao Ding <shenghao-ding@ti.com>
Date: Tue, 9 Jul 2024 12:33:40 +0800
Subject: ASoc: TAS2781: rename the tas2781_reset as tasdevice_reset

Rename the tas2781_reset as tasdevice_reset in case of misunderstanding.
RESET register for both tas2563 and tas2781 is same and the use of reset
pin is also same.

Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
Link: https://patch.msgid.link/20240709043342.946-1-shenghao-ding@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/tas2781.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h
index cd8ce522b78e..a43ad6dcb7c7 100644
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -2,7 +2,7 @@
 //
 // ALSA SoC Texas Instruments TAS2563/TAS2781 Audio Smart Amplifier
 //
-// Copyright (C) 2022 - 2023 Texas Instruments Incorporated
+// Copyright (C) 2022 - 2024 Texas Instruments Incorporated
 // https://www.ti.com
 //
 // The TAS2563/TAS2781 driver implements a flexible and configurable
@@ -43,8 +43,8 @@
 					(page * 128)) + reg)
 
 /*Software Reset */
-#define TAS2781_REG_SWRESET		TASDEVICE_REG(0x0, 0X0, 0x01)
-#define TAS2781_REG_SWRESET_RESET	BIT(0)
+#define TASDEVICE_REG_SWRESET		TASDEVICE_REG(0x0, 0X0, 0x01)
+#define TASDEVICE_REG_SWRESET_RESET	BIT(0)
 
 /*I2C Checksum */
 #define TASDEVICE_I2CChecksum		TASDEVICE_REG(0x0, 0x0, 0x7E)
@@ -140,7 +140,7 @@ struct tasdevice_priv {
 	void (*apply_calibration)(struct tasdevice_priv *tas_priv);
 };
 
-void tas2781_reset(struct tasdevice_priv *tas_dev);
+void tasdevice_reset(struct tasdevice_priv *tas_dev);
 int tascodec_init(struct tasdevice_priv *tas_priv, void *codec,
 	struct module *module,
 	void (*cont)(const struct firmware *fw, void *context));
-- 
cgit v1.2.3


From ca52aa4c60f76566601b42e935b8a78f0fb4f8eb Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner@baylibre.com>
Date: Mon, 8 Jul 2024 20:05:29 -0500
Subject: spi: add defer_optimize_message controller flag

Adding spi_optimize_message() broke the spi-mux driver because it
calls spi_async() from it's transfer_one_message() callback. This
resulted in passing an incorrectly optimized message to the controller.
For example, if the underlying controller has an optimize_message()
callback, this would have not been called and can cause a crash when
the underlying controller driver tries to transfer the message.

Also, since the spi-mux driver swaps out the controller pointer by
replacing msg->spi, __spi_unoptimize_message() was being called with a
different controller than the one used in __spi_optimize_message(). This
could cause a crash when attempting to free the message resources when
__spi_unoptimize_message() is called in spi_finalize_current_message()
since it is being called with a controller that did not allocate the
resources.

This is fixed by adding a defer_optimize_message flag for controllers.
This flag causes all of the spi_[maybe_][un]optimize_message() calls to
be a no-op (other than attaching a pointer to the spi device to the
message).

This allows the spi-mux driver to pass an unmodified message to
spi_async() in spi_mux_transfer_one_message() after the spi device has
been swapped out. This causes __spi_optimize_message() and
__spi_unoptimize_message() to be called only once per message and with
the correct/same controller in each case.

Reported-by: Oleksij Rempel <o.rempel@pengutronix.de>
Closes: https://lore.kernel.org/linux-spi/Zn6HMrYG2b7epUxT@pengutronix.de/
Reported-by: Marc Kleine-Budde <mkl@pengutronix.de>
Closes: https://lore.kernel.org/linux-spi/20240628-awesome-discerning-bear-1621f9-mkl@pengutronix.de/
Fixes: 7b1d87af14d9 ("spi: add spi_optimize_message() APIs")
Signed-off-by: David Lechner <dlechner@baylibre.com>
Link: https://patch.msgid.link/20240708-spi-mux-fix-v1-2-6c8845193128@baylibre.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 98fdef6e28f2..67b9a15a5330 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -533,6 +533,9 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
  * @queue_empty: signal green light for opportunistically skipping the queue
  *	for spi_sync transfers.
  * @must_async: disable all fast paths in the core
+ * @defer_optimize_message: set to true if controller cannot pre-optimize messages
+ *	and needs to defer the optimization step until the message is actually
+ *	being transferred
  *
  * Each SPI controller can communicate with one or more @spi_device
  * children.  These make a small bus, sharing MOSI, MISO and SCK signals
@@ -776,6 +779,7 @@ struct spi_controller {
 	/* Flag for enabling opportunistic skipping of the queue in spi_sync */
 	bool			queue_empty;
 	bool			must_async;
+	bool			defer_optimize_message;
 };
 
 static inline void *spi_controller_get_devdata(struct spi_controller *ctlr)
-- 
cgit v1.2.3


From c714f15860fcca02fe0fd7c3f1f1fc35b1768ac1 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 2 Jul 2024 14:34:39 +0800
Subject: iommufd: Add fault and response message definitions

iommu_hwpt_pgfaults represent fault messages that the userspace can
retrieve. Multiple iommu_hwpt_pgfaults might be put in an iopf group,
with the IOMMU_PGFAULT_FLAGS_LAST_PAGE flag set only for the last
iommu_hwpt_pgfault.

An iommu_hwpt_page_response is a response message that the userspace
should send to the kernel after finishing handling a group of fault
messages. The @dev_id, @pasid, and @grpid fields in the message
identify an outstanding iopf group for a device. The @cookie field,
which matches the cookie field of the last fault in the group, will
be used by the kernel to look up the pending message.

Link: https://lore.kernel.org/r/20240702063444.105814-6-baolu.lu@linux.intel.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 83 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 1dfeaa2e649e..4d89ed97b533 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -692,4 +692,87 @@ struct iommu_hwpt_invalidate {
 	__u32 __reserved;
 };
 #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
+
+/**
+ * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
+ * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
+ *                                   valid.
+ * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
+ */
+enum iommu_hwpt_pgfault_flags {
+	IOMMU_PGFAULT_FLAGS_PASID_VALID		= (1 << 0),
+	IOMMU_PGFAULT_FLAGS_LAST_PAGE		= (1 << 1),
+};
+
+/**
+ * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
+ * @IOMMU_PGFAULT_PERM_READ: request for read permission
+ * @IOMMU_PGFAULT_PERM_WRITE: request for write permission
+ * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
+ *                           Execute Requested bit set in PASID TLP Prefix.
+ * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
+ *                           Privileged Mode Requested bit set in PASID TLP
+ *                           Prefix.
+ */
+enum iommu_hwpt_pgfault_perm {
+	IOMMU_PGFAULT_PERM_READ			= (1 << 0),
+	IOMMU_PGFAULT_PERM_WRITE		= (1 << 1),
+	IOMMU_PGFAULT_PERM_EXEC			= (1 << 2),
+	IOMMU_PGFAULT_PERM_PRIV			= (1 << 3),
+};
+
+/**
+ * struct iommu_hwpt_pgfault - iommu page fault data
+ * @flags: Combination of enum iommu_hwpt_pgfault_flags
+ * @dev_id: id of the originated device
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @perm: Combination of enum iommu_hwpt_pgfault_perm
+ * @addr: Fault address
+ * @length: a hint of how much data the requestor is expecting to fetch. For
+ *          example, if the PRI initiator knows it is going to do a 10MB
+ *          transfer, it could fill in 10MB and the OS could pre-fault in
+ *          10MB of IOVA. It's default to 0 if there's no such hint.
+ * @cookie: kernel-managed cookie identifying a group of fault messages. The
+ *          cookie number encoded in the last page fault of the group should
+ *          be echoed back in the response message.
+ */
+struct iommu_hwpt_pgfault {
+	__u32 flags;
+	__u32 dev_id;
+	__u32 pasid;
+	__u32 grpid;
+	__u32 perm;
+	__u64 addr;
+	__u32 length;
+	__u32 cookie;
+};
+
+/**
+ * enum iommufd_page_response_code - Return status of fault handlers
+ * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
+ *                             populated, retry the access. This is the
+ *                             "Success" defined in PCI 10.4.2.1.
+ * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
+ *                             access. This is the "Invalid Request" in PCI
+ *                             10.4.2.1.
+ * @IOMMUFD_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from
+ *                             this device if possible. This is the "Response
+ *                             Failure" in PCI 10.4.2.1.
+ */
+enum iommufd_page_response_code {
+	IOMMUFD_PAGE_RESP_SUCCESS = 0,
+	IOMMUFD_PAGE_RESP_INVALID,
+	IOMMUFD_PAGE_RESP_FAILURE,
+};
+
+/**
+ * struct iommu_hwpt_page_response - IOMMU page fault response
+ * @cookie: The kernel-managed cookie reported in the fault message.
+ * @code: One of response code in enum iommufd_page_response_code.
+ */
+struct iommu_hwpt_page_response {
+	__u32 cookie;
+	__u32 code;
+};
 #endif
-- 
cgit v1.2.3


From 07838f7fd529c8a6de44b601d4b7057e6c8d36ed Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 2 Jul 2024 14:34:40 +0800
Subject: iommufd: Add iommufd fault object

An iommufd fault object provides an interface for delivering I/O page
faults to user space. These objects are created and destroyed by user
space, and they can be associated with or dissociated from hardware page
table objects during page table allocation or destruction.

User space interacts with the fault object through a file interface. This
interface offers a straightforward and efficient way for user space to
handle page faults. It allows user space to read fault messages
sequentially and respond to them by writing to the same file. The file
interface supports reading messages in poll mode, so it's recommended that
user space applications use io_uring to enhance read and write efficiency.

A fault object can be associated with any iopf-capable iommufd_hw_pgtable
during the pgtable's allocation. All I/O page faults triggered by devices
when accessing the I/O addresses of an iommufd_hw_pgtable are routed
through the fault object to user space. Similarly, user space's responses
to these page faults are routed back to the iommu device driver through
the same fault object.

Link: https://lore.kernel.org/r/20240702063444.105814-7-baolu.lu@linux.intel.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/iommu.h        |  4 ++++
 include/uapi/linux/iommufd.h | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 910aec80886e..73bc3aee95a1 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -124,12 +124,16 @@ struct iopf_fault {
 struct iopf_group {
 	struct iopf_fault last_fault;
 	struct list_head faults;
+	size_t fault_count;
 	/* list node for iommu_fault_param::faults */
 	struct list_head pending_node;
 	struct work_struct work;
 	struct iommu_attach_handle *attach_handle;
 	/* The device's fault data parameter. */
 	struct iommu_fault_param *fault_param;
+	/* Used by handler provider to hook the group on its own lists. */
+	struct list_head node;
+	u32 cookie;
 };
 
 /**
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 4d89ed97b533..70b8a38fcd46 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -50,6 +50,7 @@ enum {
 	IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
 	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
 	IOMMUFD_CMD_HWPT_INVALIDATE,
+	IOMMUFD_CMD_FAULT_QUEUE_ALLOC,
 };
 
 /**
@@ -775,4 +776,21 @@ struct iommu_hwpt_page_response {
 	__u32 cookie;
 	__u32 code;
 };
+
+/**
+ * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
+ * @size: sizeof(struct iommu_fault_alloc)
+ * @flags: Must be 0
+ * @out_fault_id: The ID of the new FAULT
+ * @out_fault_fd: The fd of the new FAULT
+ *
+ * Explicitly allocate a fault handling object.
+ */
+struct iommu_fault_alloc {
+	__u32 size;
+	__u32 flags;
+	__u32 out_fault_id;
+	__u32 out_fault_fd;
+};
+#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
 #endif
-- 
cgit v1.2.3


From 34765cbc679c59ea5d952d738d2d16bf4aadc497 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 2 Jul 2024 14:34:42 +0800
Subject: iommufd: Associate fault object with iommufd_hw_pgtable

When allocating a user iommufd_hw_pagetable, the user space is allowed to
associate a fault object with the hw_pagetable by specifying the fault
object ID in the page table allocation data and setting the
IOMMU_HWPT_FAULT_ID_VALID flag bit.

On a successful return of hwpt allocation, the user can retrieve and
respond to page faults by reading and writing the file interface of the
fault object.

Once a fault object has been associated with a hwpt, the hwpt is
iopf-capable, indicated by hwpt->fault is non NULL. Attaching,
detaching, or replacing an iopf-capable hwpt to an RID or PASID will
differ from those that are not iopf-capable.

Link: https://lore.kernel.org/r/20240702063444.105814-9-baolu.lu@linux.intel.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 70b8a38fcd46..ede2b464a761 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -357,10 +357,13 @@ struct iommu_vfio_ioas {
  *                                the parent HWPT in a nesting configuration.
  * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
  *                                   enforced on device attachment
+ * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
+ *                             valid.
  */
 enum iommufd_hwpt_alloc_flags {
 	IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
 	IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
+	IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
 };
 
 /**
@@ -412,6 +415,9 @@ enum iommu_hwpt_data_type {
  * @data_type: One of enum iommu_hwpt_data_type
  * @data_len: Length of the type specific data
  * @data_uptr: User pointer to the type specific data
+ * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
+ *            IOMMU_HWPT_FAULT_ID_VALID is set.
+ * @__reserved2: Padding to 64-bit alignment. Must be 0.
  *
  * Explicitly allocate a hardware page table object. This is the same object
  * type that is returned by iommufd_device_attach() and represents the
@@ -442,6 +448,8 @@ struct iommu_hwpt_alloc {
 	__u32 data_type;
 	__u32 data_len;
 	__aligned_u64 data_uptr;
+	__u32 fault_id;
+	__u32 __reserved2;
 };
 #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
 
-- 
cgit v1.2.3


From 5a5aa3c3769051c02a2210cc1b7e12a0833b76c9 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Thu, 4 Jul 2024 06:25:05 -0700
Subject: sched.h: always_inline alloc_tag_{save|restore} to fix modpost
 warnings

Mark alloc_tag_{save|restore} as always_inline to fix the following
modpost warnings:

WARNING: modpost: vmlinux: section mismatch in reference: alloc_tag_save+0x1c (section: .text.unlikely) -> initcall_level_names (section: .init.data)
WARNING: modpost: vmlinux: section mismatch in reference: alloc_tag_restore+0x3c (section: .text.unlikely) -> initcall_level_names (section: .init.data)

The warnings happen when these functions are called from an __init
function and they don't get inlined (remain in the .text section) while
the value returned by get_current() points into .init.data section.
Assuming get_current() always returns a valid address, this situation can
happen only during init stage and accessing .init.data from .text section
during that stage should pose no issues.

Link: https://lkml.kernel.org/r/20240704132506.1011978-1-surenb@google.com
Fixes: 22d407b164ff ("lib: add allocation tagging support for memory allocation profiling")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202407032306.gi9nZsBi-lkp@intel.com/
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Chris Zankel <chris@zankel.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61591ac6eab6..a5f4b48fca18 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2192,13 +2192,13 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
 extern void sched_set_stop_task(int cpu, struct task_struct *stop);
 
 #ifdef CONFIG_MEM_ALLOC_PROFILING
-static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
+static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
 {
 	swap(current->alloc_tag, tag);
 	return tag;
 }
 
-static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
+static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
 {
 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
 	WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
-- 
cgit v1.2.3


From 1b9469cf15976a7cb7378caaa8a1772e7901514d Mon Sep 17 00:00:00 2001
From: Philipp Stanner <pstanner@redhat.com>
Date: Thu, 13 Jun 2024 13:50:21 +0200
Subject: PCI: Move struct pci_devres.pinned bit to struct pci_dev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bit describing whether the PCI device is currently pinned is stored
in struct pci_devres. To clean up and simplify the PCI devres API, it's
better if this information is stored in struct pci_dev.

This will later permit simplifying pcim_enable_device().

Move the 'pinned' boolean bit to struct pci_dev.

Restructure bits in struct pci_dev so the pm / pme fields are next to
each other.

Link: https://lore.kernel.org/r/20240613115032.29098-9-pstanner@redhat.com
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index fb004fd4e889..0c19f0717899 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -367,10 +367,11 @@ struct pci_dev {
 					   this is D0-D3, D0 being fully
 					   functional, and D3 being off. */
 	u8		pm_cap;		/* PM capability offset */
-	unsigned int	imm_ready:1;	/* Supports Immediate Readiness */
 	unsigned int	pme_support:5;	/* Bitmask of states from which PME#
 					   can be generated */
 	unsigned int	pme_poll:1;	/* Poll device's PME status bit */
+	unsigned int	pinned:1;	/* Whether this dev is pinned */
+	unsigned int	imm_ready:1;	/* Supports Immediate Readiness */
 	unsigned int	d1_support:1;	/* Low power state D1 is supported */
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
 	unsigned int	no_d1d2:1;	/* D1 and D2 are forbidden */
-- 
cgit v1.2.3


From 7296f2301a057493e97b07739213c6e864f76891 Mon Sep 17 00:00:00 2001
From: Michael Kelley <mhklinux@outlook.com>
Date: Mon, 8 Jul 2024 12:41:00 -0700
Subject: swiotlb: reduce swiotlb pool lookups

With CONFIG_SWIOTLB_DYNAMIC enabled, each round-trip map/unmap pair
in the swiotlb results in 6 calls to swiotlb_find_pool(). In multiple
places, the pool is found and used in one function, and then must
be found again in the next function that is called because only the
tlb_addr is passed as an argument. These are the six call sites:

dma_direct_map_page:
 1. swiotlb_map -> swiotlb_tbl_map_single -> swiotlb_bounce

dma_direct_unmap_page:
 2. dma_direct_sync_single_for_cpu -> is_swiotlb_buffer
 3. dma_direct_sync_single_for_cpu -> swiotlb_sync_single_for_cpu ->
	swiotlb_bounce
 4. is_swiotlb_buffer
 5. swiotlb_tbl_unmap_single -> swiotlb_del_transient
 6. swiotlb_tbl_unmap_single -> swiotlb_release_slots

Reduce the number of calls by finding the pool at a higher level, and
passing it as an argument instead of searching again. A key change is
for is_swiotlb_buffer() to return a pool pointer instead of a boolean,
and then pass this pool pointer to subsequent swiotlb functions.

There are 9 occurrences of is_swiotlb_buffer() used to test if a buffer
is a swiotlb buffer before calling a swiotlb function. To reduce code
duplication in getting the pool pointer and passing it as an argument,
introduce inline wrappers for this pattern. The generated code is
essentially unchanged.

Since is_swiotlb_buffer() no longer returns a boolean, rename some
functions to reflect the change:

 * swiotlb_find_pool() becomes __swiotlb_find_pool()
 * is_swiotlb_buffer() becomes swiotlb_find_pool()
 * is_xen_swiotlb_buffer() becomes xen_swiotlb_find_pool()

With these changes, a round-trip map/unmap pair requires only 2 pool
lookups (listed using the new names and wrappers):

dma_direct_unmap_page:
 1. dma_direct_sync_single_for_cpu -> swiotlb_find_pool
 2. swiotlb_tbl_unmap_single -> swiotlb_find_pool

These changes come from noticing the inefficiencies in a code review,
not from performance measurements. With CONFIG_SWIOTLB_DYNAMIC,
__swiotlb_find_pool() is not trivial, and it uses an RCU read lock,
so avoiding the redundant calls helps performance in a hot path.
When CONFIG_SWIOTLB_DYNAMIC is *not* set, the code size reduction
is minimal and the perf benefits are likely negligible, but no
harm is done.

No functional change is intended.

Signed-off-by: Michael Kelley <mhklinux@outlook.com>
Reviewed-by: Petr Tesarik <petr@tesarici.cz>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/scatterlist.h |   2 +-
 include/linux/swiotlb.h     | 105 ++++++++++++++++++++++++++------------------
 2 files changed, 63 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 77df3d7b18a6..e61d164622db 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -332,7 +332,7 @@ static inline void sg_dma_unmark_bus_address(struct scatterlist *sg)
  * Description:
  *   Returns true if the scatterlist was marked for SWIOTLB bouncing. Not all
  *   elements may have been bounced, so the caller would have to check
- *   individual SG entries with is_swiotlb_buffer().
+ *   individual SG entries with swiotlb_find_pool().
  */
 static inline bool sg_dma_is_swiotlb(struct scatterlist *sg)
 {
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 14bc10c1bb23..3dae0f592063 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -42,24 +42,6 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
 	int (*remap)(void *tlb, unsigned long nslabs));
 extern void __init swiotlb_update_mem_attributes(void);
 
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
-		size_t mapping_size,
-		unsigned int alloc_aligned_mask, enum dma_data_direction dir,
-		unsigned long attrs);
-
-extern void swiotlb_tbl_unmap_single(struct device *hwdev,
-				     phys_addr_t tlb_addr,
-				     size_t mapping_size,
-				     enum dma_data_direction dir,
-				     unsigned long attrs);
-
-void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
-		size_t size, enum dma_data_direction dir);
-void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
-		size_t size, enum dma_data_direction dir);
-dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
-		size_t size, enum dma_data_direction dir, unsigned long attrs);
-
 #ifdef CONFIG_SWIOTLB
 
 /**
@@ -143,37 +125,27 @@ struct io_tlb_mem {
 #endif
 };
 
-#ifdef CONFIG_SWIOTLB_DYNAMIC
-
-struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr);
-
-#else
-
-static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
-						    phys_addr_t paddr)
-{
-	return &dev->dma_io_tlb_mem->defpool;
-}
-
-#endif
+struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr);
 
 /**
- * is_swiotlb_buffer() - check if a physical address belongs to a swiotlb
+ * swiotlb_find_pool() - find swiotlb pool to which a physical address belongs
  * @dev:        Device which has mapped the buffer.
  * @paddr:      Physical address within the DMA buffer.
  *
- * Check if @paddr points into a bounce buffer.
+ * Find the swiotlb pool that @paddr points into.
  *
  * Return:
- * * %true if @paddr points into a bounce buffer
- * * %false otherwise
+ * * pool address if @paddr points into a bounce buffer
+ * * NULL if @paddr does not point into a bounce buffer. As such, this function
+ *   can be used to determine if @paddr denotes a swiotlb bounce buffer.
  */
-static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
+static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
+		phys_addr_t paddr)
 {
 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 
 	if (!mem)
-		return false;
+		return NULL;
 
 #ifdef CONFIG_SWIOTLB_DYNAMIC
 	/*
@@ -182,16 +154,19 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 	 * If a SWIOTLB address is checked on another CPU, then it was
 	 * presumably loaded by the device driver from an unspecified private
 	 * data structure. Make sure that this load is ordered before reading
-	 * dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool().
+	 * dev->dma_uses_io_tlb here and mem->pools in __swiotlb_find_pool().
 	 *
 	 * This barrier pairs with smp_mb() in swiotlb_find_slots().
 	 */
 	smp_rmb();
-	return READ_ONCE(dev->dma_uses_io_tlb) &&
-		swiotlb_find_pool(dev, paddr);
+	if (READ_ONCE(dev->dma_uses_io_tlb))
+		return __swiotlb_find_pool(dev, paddr);
 #else
-	return paddr >= mem->defpool.start && paddr < mem->defpool.end;
+	if (paddr >= mem->defpool.start && paddr < mem->defpool.end)
+		return &mem->defpool;
 #endif
+
+	return NULL;
 }
 
 static inline bool is_swiotlb_force_bounce(struct device *dev)
@@ -219,9 +194,10 @@ static inline void swiotlb_dev_init(struct device *dev)
 {
 }
 
-static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
+static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
+		phys_addr_t paddr)
 {
-	return false;
+	return NULL;
 }
 static inline bool is_swiotlb_force_bounce(struct device *dev)
 {
@@ -260,6 +236,49 @@ static inline phys_addr_t default_swiotlb_limit(void)
 }
 #endif /* CONFIG_SWIOTLB */
 
+phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
+		size_t mapping_size, unsigned int alloc_aligned_mask,
+		enum dma_data_direction dir, unsigned long attrs);
+dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
+		size_t size, enum dma_data_direction dir, unsigned long attrs);
+
+void __swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
+		size_t mapping_size, enum dma_data_direction dir,
+		unsigned long attrs, struct io_tlb_pool *pool);
+static inline void swiotlb_tbl_unmap_single(struct device *dev,
+		phys_addr_t addr, size_t size, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr);
+
+	if (unlikely(pool))
+		__swiotlb_tbl_unmap_single(dev, addr, size, dir, attrs, pool);
+}
+
+void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
+		size_t size, enum dma_data_direction dir,
+		struct io_tlb_pool *pool);
+static inline void swiotlb_sync_single_for_device(struct device *dev,
+		phys_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+	struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr);
+
+	if (unlikely(pool))
+		__swiotlb_sync_single_for_device(dev, addr, size, dir, pool);
+}
+
+void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
+		size_t size, enum dma_data_direction dir,
+		struct io_tlb_pool *pool);
+static inline void swiotlb_sync_single_for_cpu(struct device *dev,
+		phys_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+	struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr);
+
+	if (unlikely(pool))
+		__swiotlb_sync_single_for_cpu(dev, addr, size, dir, pool);
+}
+
 extern void swiotlb_print_info(void);
 
 #ifdef CONFIG_DMA_RESTRICTED_POOL
-- 
cgit v1.2.3


From ab7a880263c30b1675850a584c206770f5545c2f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 8 Jul 2024 10:15:45 +0200
Subject: driver core: make driver_[create|remove]_file take a const *

The functions driver_create_file() and driver_remove_file() do not
modify the struct device_driver structure directly, so they are safe to
be marked as a constant pointer type.

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Link: https://lore.kernel.org/r/2024070844-volley-hatchling-c812@gregkh
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device/driver.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index 7738f458995f..dceb36f1c42c 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -146,9 +146,9 @@ struct driver_attribute {
 #define DRIVER_ATTR_WO(_name) \
 	struct driver_attribute driver_attr_##_name = __ATTR_WO(_name)
 
-int __must_check driver_create_file(struct device_driver *driver,
+int __must_check driver_create_file(const struct device_driver *driver,
 				    const struct driver_attribute *attr);
-void driver_remove_file(struct device_driver *driver,
+void driver_remove_file(const struct device_driver *driver,
 			const struct driver_attribute *attr);
 
 int driver_set_override(struct device *dev, const char **override,
-- 
cgit v1.2.3


From f8fb469147e7db57e3f78d46f3f427705b4a1935 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 8 Jul 2024 10:15:46 +0200
Subject: driver core: make driver_find_device() take a const *

The function driver_find_device() does not modify the struct
device_driver structure directly, so it is safe to be marked as a
constant pointer type.  As that is fixed up, also change the function
signature on the inline functions that call this, which are:
	driver_find_device_by_name()
	driver_find_device_by_of_node()
	driver_find_device_by_devt()
	driver_find_next_device()
	driver_find_device_by_acpi_dev()

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Link: https://lore.kernel.org/r/2024070849-broken-front-9eb5@gregkh
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device/driver.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index dceb36f1c42c..1fc8b68786de 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -155,7 +155,7 @@ int driver_set_override(struct device *dev, const char **override,
 			const char *s, size_t len);
 int __must_check driver_for_each_device(struct device_driver *drv, struct device *start,
 					void *data, int (*fn)(struct device *dev, void *));
-struct device *driver_find_device(struct device_driver *drv,
+struct device *driver_find_device(const struct device_driver *drv,
 				  struct device *start, const void *data,
 				  int (*match)(struct device *dev, const void *data));
 
@@ -165,7 +165,7 @@ struct device *driver_find_device(struct device_driver *drv,
  * @drv: the driver we're iterating
  * @name: name of the device to match
  */
-static inline struct device *driver_find_device_by_name(struct device_driver *drv,
+static inline struct device *driver_find_device_by_name(const struct device_driver *drv,
 							const char *name)
 {
 	return driver_find_device(drv, NULL, name, device_match_name);
@@ -178,7 +178,7 @@ static inline struct device *driver_find_device_by_name(struct device_driver *dr
  * @np: of_node pointer to match.
  */
 static inline struct device *
-driver_find_device_by_of_node(struct device_driver *drv,
+driver_find_device_by_of_node(const struct device_driver *drv,
 			      const struct device_node *np)
 {
 	return driver_find_device(drv, NULL, np, device_match_of_node);
@@ -203,13 +203,13 @@ driver_find_device_by_fwnode(struct device_driver *drv,
  * @drv: the driver we're iterating
  * @devt: devt pointer to match.
  */
-static inline struct device *driver_find_device_by_devt(struct device_driver *drv,
+static inline struct device *driver_find_device_by_devt(const struct device_driver *drv,
 							dev_t devt)
 {
 	return driver_find_device(drv, NULL, &devt, device_match_devt);
 }
 
-static inline struct device *driver_find_next_device(struct device_driver *drv,
+static inline struct device *driver_find_next_device(const struct device_driver *drv,
 						     struct device *start)
 {
 	return driver_find_device(drv, start, NULL, device_match_any);
@@ -223,14 +223,14 @@ static inline struct device *driver_find_next_device(struct device_driver *drv,
  * @adev: ACPI_COMPANION device to match.
  */
 static inline struct device *
-driver_find_device_by_acpi_dev(struct device_driver *drv,
+driver_find_device_by_acpi_dev(const struct device_driver *drv,
 			       const struct acpi_device *adev)
 {
 	return driver_find_device(drv, NULL, adev, device_match_acpi_dev);
 }
 #else
 static inline struct device *
-driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev)
+driver_find_device_by_acpi_dev(const struct device_driver *drv, const void *adev)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From c9add2e607a1ca63cc84bd1a7ab04d513096f37e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 10 Jul 2024 09:34:14 +0200
Subject: zorro: make match function take a const pointer

In commit d69d80484598 ("driver core: have match() callback in struct
bus_type take a const *"), the match callback for busses was changed to
take a const pointer to struct device_driver.  Unfortunately I missed
fixing up the zorro code, and was only noticed after-the-fact by the
kernel test robot.  Resolve this issue by properly changing the
zorro_bus_match() function.

Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *")
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20240710073413.495541-2-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/zorro.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/zorro.h b/include/linux/zorro.h
index db7416ed6057..f36c8d39553d 100644
--- a/include/linux/zorro.h
+++ b/include/linux/zorro.h
@@ -52,7 +52,7 @@ struct zorro_driver {
     struct device_driver driver;
 };
 
-#define	to_zorro_driver(drv)	container_of(drv, struct zorro_driver, driver)
+#define	to_zorro_driver(drv)	container_of_const(drv, struct zorro_driver, driver)
 
 
 #define zorro_for_each_dev(dev)	\
-- 
cgit v1.2.3


From 1c076f1f4d7fc7cfb45dba10b3b49d574b4c4c28 Mon Sep 17 00:00:00 2001
From: Hongzhen Luo <hongzhen@linux.alibaba.com>
Date: Wed, 10 Jul 2024 16:34:59 +0800
Subject: erofs: get rid of z_erofs_map_blocks_iter_* tracepoints

Consolidate them under erofs_map_blocks_* for simplicity since we
have many other ways to know if a given inode is compressed or not.

Signed-off-by: Hongzhen Luo <hongzhen@linux.alibaba.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240710083459.208362-1-hongzhen@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/trace/events/erofs.h | 32 +++-----------------------------
 1 file changed, 3 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index b9bbfd855f2a..57df3843e650 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -143,7 +143,8 @@ TRACE_EVENT(erofs_readpages,
 		__entry->raw)
 );
 
-DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
+TRACE_EVENT(erofs_map_blocks_enter,
+
 	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
 		 unsigned int flags),
 
@@ -171,21 +172,8 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
 		  __entry->flags ? show_map_flags(__entry->flags) : "NULL")
 );
 
-DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_enter,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned flags),
-
-	TP_ARGS(inode, map, flags)
-);
-
-DEFINE_EVENT(erofs__map_blocks_enter, z_erofs_map_blocks_iter_enter,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned int flags),
-
-	TP_ARGS(inode, map, flags)
-);
+TRACE_EVENT(erofs_map_blocks_exit,
 
-DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
 	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
 		 unsigned int flags, int ret),
 
@@ -223,20 +211,6 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
 		  show_mflags(__entry->mflags), __entry->ret)
 );
 
-DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_exit,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned flags, int ret),
-
-	TP_ARGS(inode, map, flags, ret)
-);
-
-DEFINE_EVENT(erofs__map_blocks_exit, z_erofs_map_blocks_iter_exit,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned int flags, int ret),
-
-	TP_ARGS(inode, map, flags, ret)
-);
-
 TRACE_EVENT(erofs_destroy_inode,
 	TP_PROTO(struct inode *inode),
 
-- 
cgit v1.2.3


From 396a27e91265a6632be17bebacb6743f0b9447be Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 4 Jul 2024 15:45:00 +0200
Subject: dm: Remove max_write_zeroes_granularity

The max_write_zeroes_granularity boolean of struct dm_target is used in
__process_abnormal_io() but never set by any target. Remove this field
and the dead code using it.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
 include/linux/device-mapper.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 3611b230d0aa..5b7e96653ec6 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -369,12 +369,6 @@ struct dm_target {
 	 */
 	bool max_secure_erase_granularity:1;
 
-	/*
-	 * Set if this target requires that write_zeroes be split on
-	 * 'max_write_zeroes_sectors' boundaries.
-	 */
-	bool max_write_zeroes_granularity:1;
-
 	/*
 	 * Set if we need to limit the number of in-flight bios when swapping.
 	 */
-- 
cgit v1.2.3


From 9d45db03acf9cee4f83148c403d105b1a38a0f23 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 4 Jul 2024 15:45:25 +0200
Subject: dm: Remove max_secure_erase_granularity

The max_secure_erase_granularity boolean of struct dm_target is used in
__process_abnormal_io() but never set by any target. Remove this field
and the dead code using it.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
 include/linux/device-mapper.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 5b7e96653ec6..4ba2e73993bd 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -363,12 +363,6 @@ struct dm_target {
 	 */
 	bool max_discard_granularity:1;
 
-	/*
-	 * Set if this target requires that secure_erases be split on
-	 * 'max_secure_erase_sectors' boundaries.
-	 */
-	bool max_secure_erase_granularity:1;
-
 	/*
 	 * Set if we need to limit the number of in-flight bios when swapping.
 	 */
-- 
cgit v1.2.3


From a21f9edb13b0d8066775cbd5efa7261e41871182 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Thu, 4 Jul 2024 16:17:15 +0200
Subject: dm: factor out helper function from dm_get_device

Factor out a helper function, dm_devt_from_path(), from dm_get_device()
for use in dm targets.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
 include/linux/device-mapper.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 4ba2e73993bd..384649a61bfa 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -179,6 +179,11 @@ int dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode,
 		  struct dm_dev **result);
 void dm_put_device(struct dm_target *ti, struct dm_dev *d);
 
+/*
+ * Helper function for getting devices
+ */
+int dm_devt_from_path(const char *path, dev_t *dev_p);
+
 /*
  * Information about a target type
  */
-- 
cgit v1.2.3


From 2bb6b10ebe5d42c119827b57ee8123175b7ecc3d Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Tue, 9 Jul 2024 08:49:56 -0700
Subject: usb: gadget: Use u16 types for 16-bit fields

Since the beginning of time, struct usb_ep::maxpacket was a bitfield,
and when new 16-bit members were added, the convention was followed:

1da177e4c3f41 (Linus Torvalds   2005-04-16 236) unsigned maxpacket:16;
e117e742d3106 (Robert Baldyga   2013-12-13 237) unsigned maxpacket_limit:16;
a59d6b91cbca5 (Tatyana Brokhman 2011-06-28 238) unsigned max_streams:16;

However, there is no need for this as a simple u16 can be used instead,
simplifying the struct and the resulting compiler binary output. Switch
to u16 for all three, and rearrange struct slightly to minimize holes.
No change in the final size of the struct results; the 2 byte gap is
just moved to the end, as seen with pahole:

-       /* XXX 2 bytes hole, try to pack */
        ...
        /* size: 72, cachelines: 2, members: 15 */
        ...
+       /* padding: 2 */

Changing this simplifies future introspection[1] of maxpacket's type during
allocations:

drivers/usb/gadget/function/f_tcm.c:330:24: error: 'typeof' applied to a bit-field
     330 |  fu->cmd.buf = kmalloc(fu->ep_out->maxpacket, GFP_KERNEL);

Link: https://lore.kernel.org/all/202407090928.6UaOAZAJ-lkp@intel.com [1]
Signed-off-by: Kees Cook <kees@kernel.org>
Link: https://lore.kernel.org/r/20240709154953.work.953-kees@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/gadget.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 56dda8e1562d..df33333650a0 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -229,18 +229,18 @@ struct usb_ep {
 
 	const char		*name;
 	const struct usb_ep_ops	*ops;
+	const struct usb_endpoint_descriptor	*desc;
+	const struct usb_ss_ep_comp_descriptor	*comp_desc;
 	struct list_head	ep_list;
 	struct usb_ep_caps	caps;
 	bool			claimed;
 	bool			enabled;
-	unsigned		maxpacket:16;
-	unsigned		maxpacket_limit:16;
-	unsigned		max_streams:16;
 	unsigned		mult:2;
 	unsigned		maxburst:5;
 	u8			address;
-	const struct usb_endpoint_descriptor	*desc;
-	const struct usb_ss_ep_comp_descriptor	*comp_desc;
+	u16			maxpacket;
+	u16			maxpacket_limit;
+	u16			max_streams;
 };
 
 /*-------------------------------------------------------------------------*/
-- 
cgit v1.2.3


From a5f81642a7228489292f842a106e33c558121e8b Mon Sep 17 00:00:00 2001
From: Kerem Karabay <kekrby@gmail.com>
Date: Sat, 6 Jul 2024 12:03:23 +0000
Subject: USB: core: add 'shutdown' callback to usb_driver

Currently there is no standardized method for USB drivers to handle
shutdown events. This patch simplifies running code on shutdown for USB
devices by adding a shutdown callback to usb_driver.

Signed-off-by: Kerem Karabay <kekrby@gmail.com>
Signed-off-by: Aditya Garg <gargaditya08@live.com>
Link: https://lore.kernel.org/r/7AAC1BF4-8B60-448D-A3C1-B7E80330BE42@live.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 1913a13833f2..832997a9da0a 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1171,6 +1171,7 @@ extern ssize_t usb_show_dynids(struct usb_dynids *dynids, char *buf);
  *	post_reset method is called.
  * @post_reset: Called by usb_reset_device() after the device
  *	has been reset
+ * @shutdown: Called at shut-down time to quiesce the device.
  * @id_table: USB drivers use ID table to support hotplugging.
  *	Export this with MODULE_DEVICE_TABLE(usb,...).  This must be set
  *	or your driver's probe function will never get called.
@@ -1222,6 +1223,8 @@ struct usb_driver {
 	int (*pre_reset)(struct usb_interface *intf);
 	int (*post_reset)(struct usb_interface *intf);
 
+	void (*shutdown)(struct usb_interface *intf);
+
 	const struct usb_device_id *id_table;
 	const struct attribute_group **dev_groups;
 
-- 
cgit v1.2.3


From b70f12e962bc73a091a7b853f24ae2049613c684 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 26 Apr 2024 08:44:51 +0200
Subject: kbuild: verify asm-generic header list

In order to integrate the system call header generation with generating
the asm-generic wrappers, restrict the generated headers to those that
actually exist in include/asm-generic/.

The path is already known, so add these as a dependency.

The asm-generic/bugs.h header was removed in commit 61235b24b9cb ("init:
Remove check_bugs() leftovers"), which now causes a build failure, so
drop it from the list.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/Kbuild | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index b20fa25a7e8d..df7ed81c4589 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -9,7 +9,6 @@ mandatory-y += archrandom.h
 mandatory-y += barrier.h
 mandatory-y += bitops.h
 mandatory-y += bug.h
-mandatory-y += bugs.h
 mandatory-y += cacheflush.h
 mandatory-y += cfi.h
 mandatory-y += checksum.h
-- 
cgit v1.2.3


From 505d66d1abfb90853e24ab6cbdf83b611473d6fc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 8 May 2024 17:13:34 +0200
Subject: clone3: drop __ARCH_WANT_SYS_CLONE3 macro

When clone3() was introduced, it was not obvious how each architecture
deals with setting up the stack and keeping the register contents in
a fork()-like system call, so this was left for the architecture
maintainers to implement, with __ARCH_WANT_SYS_CLONE3 defined by those
that already implement it.

Five years later, we still have a few architectures left that are missing
clone3(), and the macro keeps getting in the way as it's fundamentally
different from all the other __ARCH_WANT_SYS_* macros that are meant
to provide backwards-compatibility with applications using older
syscalls that are no longer provided by default.

Address this by reversing the polarity of the macro, adding an
__ARCH_BROKEN_SYS_CLONE3 macro to all architectures that don't
already provide the syscall, and remove __ARCH_WANT_SYS_CLONE3
from all the other ones.

Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/asm-generic/unistd.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index d4cc26932ff4..5bf6148cac2b 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -776,12 +776,8 @@ __SYSCALL(__NR_fsmount, sys_fsmount)
 __SYSCALL(__NR_fspick, sys_fspick)
 #define __NR_pidfd_open 434
 __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
-
-#ifdef __ARCH_WANT_SYS_CLONE3
 #define __NR_clone3 435
 __SYSCALL(__NR_clone3, sys_clone3)
-#endif
-
 #define __NR_close_range 436
 __SYSCALL(__NR_close_range, sys_close_range)
 #define __NR_openat2 437
-- 
cgit v1.2.3


From 5f67eef6dff39421215e9134f1eaae51b67a73b7 Mon Sep 17 00:00:00 2001
From: Vamsi Attunuru <vattunuru@marvell.com>
Date: Sat, 6 Jul 2024 08:30:09 -0700
Subject: misc: mrvl-cn10k-dpi: add Octeon CN10K DPI administrative driver

Adds a misc driver for Marvell CN10K DPI(DMA Engine) device's physical
function which initializes DPI DMA hardware's global configuration and
enables hardware mailbox channels between physical function (PF) and
it's virtual functions (VF). VF device drivers (User space drivers) use
this hw mailbox to communicate any required device configuration on it's
respective VF device. Accordingly, this DPI PF driver provisions the
VF device resources.

At the hardware level, the DPI physical function (PF) acts as a management
interface to setup the VF device resources, VF devices are only provisioned
to handle or control the actual DMA Engine's data transfer capabilities.

Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
Reviewed-by: Srujana Challa <schalla@marvell.com>
Link: https://lore.kernel.org/r/20240706153009.3775333-1-vattunuru@marvell.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/misc/mrvl_cn10k_dpi.h | 39 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 include/uapi/misc/mrvl_cn10k_dpi.h

(limited to 'include')

diff --git a/include/uapi/misc/mrvl_cn10k_dpi.h b/include/uapi/misc/mrvl_cn10k_dpi.h
new file mode 100644
index 000000000000..8db902eaa907
--- /dev/null
+++ b/include/uapi/misc/mrvl_cn10k_dpi.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Marvell Octeon CN10K DPI driver
+ *
+ * Copyright (C) 2024 Marvell.
+ *
+ */
+
+#ifndef __MRVL_CN10K_DPI_H__
+#define __MRVL_CN10K_DPI_H__
+
+#include <linux/types.h>
+
+#define DPI_MAX_ENGINES 6
+
+struct dpi_mps_mrrs_cfg {
+	__u16 max_read_req_sz; /* Max read request size */
+	__u16 max_payload_sz;  /* Max payload size */
+	__u16 port; /* Ebus port */
+	__u16 reserved; /* Reserved */
+};
+
+struct dpi_engine_cfg {
+	__u64 fifo_mask; /* FIFO size mask in KBytes */
+	__u16 molr[DPI_MAX_ENGINES]; /* Max outstanding load requests */
+	__u16 update_molr; /* '1' to update engine MOLR */
+	__u16 reserved; /* Reserved */
+};
+
+/* DPI ioctl numbers */
+#define DPI_MAGIC_NUM	0xB8
+
+/* Set MPS & MRRS parameters */
+#define DPI_MPS_MRRS_CFG _IOW(DPI_MAGIC_NUM, 1, struct dpi_mps_mrrs_cfg)
+
+/* Set Engine FIFO configuration */
+#define DPI_ENGINE_CFG   _IOW(DPI_MAGIC_NUM, 2, struct dpi_engine_cfg)
+
+#endif /* __MRVL_CN10K_DPI_H__ */
-- 
cgit v1.2.3


From af46fe8c41de5b79358c3007e3854dda9c61c7dc Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 10 Jul 2024 09:44:52 +0200
Subject: dio: Have dio_bus_match() callback take a const *
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/dio/dio-driver.c:128:11: error: initialization of ‘int (*)(struct device *, const struct device_driver *)’ from incompatible pointer type ‘int (*)(struct device *, struct device_driver *)’ [-Werror=incompatible-pointer-types]
  128 |  .match = dio_bus_match,
      |           ^~~~~~~~~~~~~
drivers/dio/dio-driver.c:128:11: note: (near initialization for ‘dio_bus_type.match’)

Reported-by: noreply@ellerman.id.au
Fixes: d69d804845985c29 ("driver core: have match() callback in struct bus_type take a const *")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20240710074452.2841173-1-geert@linux-m68k.org
[ added dio.h change - gregkh ]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/dio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dio.h b/include/linux/dio.h
index 2b5923909f96..464331c4c4a7 100644
--- a/include/linux/dio.h
+++ b/include/linux/dio.h
@@ -93,7 +93,7 @@ struct dio_driver {
 	struct device_driver driver;
 };
 
-#define to_dio_driver(drv)    container_of(drv, struct dio_driver, driver)
+#define to_dio_driver(drv)    container_of_const(drv, struct dio_driver, driver)
 
 /* DIO/DIO-II boards all have the following 8bit registers.
  * These are offsets from the base of the device.
-- 
cgit v1.2.3


From 29f1c1ae6d2fff3bf4f89d265f4a1a7c8ab78a8e Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 29 Jun 2024 22:12:09 -0400
Subject: closures: fix closure_sync + closure debugging

originally, stack closures were only used synchronously, and with the
original implementation of closure_sync() the ref never hit 0; thus,
closure_put_after_sub() assumes that if the ref hits 0 it's on the debug
list, in debug mode.

that's no longer true with the current implementation of closure_sync,
so we need a new magic so closure_debug_destroy() doesn't pop an assert.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/closure.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/closure.h b/include/linux/closure.h
index 59b8c06b11ff..2af44427107d 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -159,6 +159,7 @@ struct closure {
 #ifdef CONFIG_DEBUG_CLOSURES
 #define CLOSURE_MAGIC_DEAD	0xc054dead
 #define CLOSURE_MAGIC_ALIVE	0xc054a11e
+#define CLOSURE_MAGIC_STACK	0xc05451cc
 
 	unsigned int		magic;
 	struct list_head	all;
@@ -323,12 +324,18 @@ static inline void closure_init_stack(struct closure *cl)
 {
 	memset(cl, 0, sizeof(struct closure));
 	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+#ifdef CONFIG_DEBUG_CLOSURES
+	cl->magic = CLOSURE_MAGIC_STACK;
+#endif
 }
 
 static inline void closure_init_stack_release(struct closure *cl)
 {
 	memset(cl, 0, sizeof(struct closure));
 	atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+#ifdef CONFIG_DEBUG_CLOSURES
+	cl->magic = CLOSURE_MAGIC_STACK;
+#endif
 }
 
 /**
-- 
cgit v1.2.3


From 33c651a3fba9a3d380cb0e35ea207e048d39bed9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <ukleinek@baylibre.com>
Date: Fri, 7 Jun 2024 18:00:13 +0200
Subject: pwm: Make use of a symbol namespace for the core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Define all pwm core's symbols in the namespace "PWM". The necessary
module import statement is just added to the main header, this way every
file that knows about the public functions automatically has this
namespace available.

Thanks to Biju Das for pointing out a cut'n'paste failure in my initial
patch.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/20240607160012.1206874-2-u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 60b92c2c75ef..812c550de60c 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -4,9 +4,12 @@
 
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
 
+MODULE_IMPORT_NS(PWM);
+
 struct pwm_chip;
 
 /**
-- 
cgit v1.2.3


From d6f66e292676db976c4d42df2631427236c36fdb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <ukleinek@baylibre.com>
Date: Fri, 7 Jun 2024 10:44:17 +0200
Subject: pwm: Make pwm_request_from_chip() private to the core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The last user of this function outside of core.c is gone, so it can be
made static.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Reviewed-by: Tzung-Bi Shih <tzungbi@kernel.org>
Link: https://lore.kernel.org/r/20240607084416.897777-8-u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 812c550de60c..2e225f3b4282 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -410,10 +410,6 @@ void pwmchip_remove(struct pwm_chip *chip);
 int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner);
 #define devm_pwmchip_add(dev, chip) __devm_pwmchip_add(dev, chip, THIS_MODULE)
 
-struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
-					 unsigned int index,
-					 const char *label);
-
 struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *chip,
 		const struct of_phandle_args *args);
 struct pwm_device *of_pwm_single_xlate(struct pwm_chip *chip,
@@ -508,14 +504,6 @@ static inline int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip)
 	return -EINVAL;
 }
 
-static inline struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
-						       unsigned int index,
-						       const char *label)
-{
-	might_sleep();
-	return ERR_PTR(-ENODEV);
-}
-
 static inline struct pwm_device *pwm_get(struct device *dev,
 					 const char *consumer)
 {
-- 
cgit v1.2.3


From a96d3659c9437673dbef5c05cba31a3c0b5a0a7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <ukleinek@baylibre.com>
Date: Fri, 7 Jun 2024 12:42:31 +0200
Subject: pwm: Remove wrong implementation details from pwm_ops's documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When .get_state() is called is an implementation detail that
implementors and users shouldn't care about and rely on. Additionally
it's wrong, because with PWM_DEBUG enabled it is called more often.

Just drop the wrong statement.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Link: https://lore.kernel.org/r/611ba758d7e9fb2695e96b23cb7ceeefb6ba8513.1717756902.git.u.kleine-koenig@baylibre.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 2e225f3b4282..75ad0d2fd949 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -252,9 +252,7 @@ struct pwm_capture {
  * @free: optional hook for freeing a PWM
  * @capture: capture and report PWM signal
  * @apply: atomically apply a new PWM config
- * @get_state: get the current PWM state. This function is only
- *	       called once per PWM device when the PWM chip is
- *	       registered.
+ * @get_state: get the current PWM state.
  */
 struct pwm_ops {
 	int (*request)(struct pwm_chip *chip, struct pwm_device *pwm);
-- 
cgit v1.2.3


From da804fa9bc718e4210ec27cc0457ecc1eb073a14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <ukleinek@kernel.org>
Date: Fri, 14 Jun 2024 17:39:00 +0200
Subject: pwm: Drop pwm_apply_state()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This function is not supposed to be used any more since commit
c748a6d77c06 ("pwm: Rename pwm_apply_state() to
pwm_apply_might_sleep()") that is included in v6.8-rc1. Two kernel
releases should be enough for everyone to adapt, so drop the old
function that was introduced as a compatibility stub for the transition.

Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
---
 include/linux/pwm.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 75ad0d2fd949..f8c2dc12dbd3 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -563,13 +563,6 @@ static inline void pwm_apply_args(struct pwm_device *pwm)
 	pwm_apply_might_sleep(pwm, &state);
 }
 
-/* only for backwards-compatibility, new code should not use this */
-static inline int pwm_apply_state(struct pwm_device *pwm,
-				  const struct pwm_state *state)
-{
-	return pwm_apply_might_sleep(pwm, state);
-}
-
 struct pwm_lookup {
 	struct list_head list;
 	const char *provider;
-- 
cgit v1.2.3


From 2cb13dec8c5e5e104fd2f71c2dee761d6ed9a333 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Thu, 20 Jun 2024 15:53:34 +0200
Subject: cache: add __cacheline_group_{begin, end}_aligned() (+ couple more)

__cacheline_group_begin(), unfortunately, doesn't align the group
anyhow. If it is wanted, then you need to do something like

__cacheline_group_begin(grp) __aligned(ALIGN)

which isn't really convenient nor compact.
Add the _aligned() counterparts to align the groups automatically to
either the specified alignment (optional) or ``SMP_CACHE_BYTES``.
Note that the actual struct layout will then be (on x64 with 64-byte CL):

struct x {
	u32 y;				// offset 0, size 4, padding 56
	__cacheline_group_begin__grp;	// offset 64, size 0
	u32 z;				// offset 64, size 4, padding 4
	__cacheline_group_end__grp;	// offset 72, size 0
	__cacheline_group_pad__grp;	// offset 72, size 0, padding 56
	u32 w;				// offset 128
};

The end marker is aligned to long, so that you can assert the struct
size more strictly, but the offset of the next field in the structure
will be aligned to the group alignment, so that the next field won't
fall into the group it's not intended to.

Add __LARGEST_ALIGN definition and LARGEST_ALIGN() macro.
__LARGEST_ALIGN is the value to which the compilers align fields when
__aligned_largest is specified. Sometimes, it might be needed to get
this value outside of variable definitions. LARGEST_ALIGN() is macro
which just aligns a value to __LARGEST_ALIGN.
Also add SMP_CACHE_ALIGN(), similar to L1_CACHE_ALIGN(), but using
``SMP_CACHE_BYTES`` instead of ``L1_CACHE_BYTES`` as the former
also accounts L2, needed in some cases.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/cache.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

(limited to 'include')

diff --git a/include/linux/cache.h b/include/linux/cache.h
index 0ecb17bb6883..ca2a05682a54 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -13,6 +13,32 @@
 #define SMP_CACHE_BYTES L1_CACHE_BYTES
 #endif
 
+/**
+ * SMP_CACHE_ALIGN - align a value to the L2 cacheline size
+ * @x: value to align
+ *
+ * On some architectures, L2 ("SMP") CL size is bigger than L1, and sometimes,
+ * this needs to be accounted.
+ *
+ * Return: aligned value.
+ */
+#ifndef SMP_CACHE_ALIGN
+#define SMP_CACHE_ALIGN(x)	ALIGN(x, SMP_CACHE_BYTES)
+#endif
+
+/*
+ * ``__aligned_largest`` aligns a field to the value most optimal for the
+ * target architecture to perform memory operations. Get the actual value
+ * to be able to use it anywhere else.
+ */
+#ifndef __LARGEST_ALIGN
+#define __LARGEST_ALIGN		sizeof(struct { long x; } __aligned_largest)
+#endif
+
+#ifndef LARGEST_ALIGN
+#define LARGEST_ALIGN(x)	ALIGN(x, __LARGEST_ALIGN)
+#endif
+
 /*
  * __read_mostly is used to keep rarely changing variables out of frequently
  * updated cachelines. Its use should be reserved for data that is used
@@ -95,6 +121,39 @@
 	__u8 __cacheline_group_end__##GROUP[0]
 #endif
 
+/**
+ * __cacheline_group_begin_aligned - declare an aligned group start
+ * @GROUP: name of the group
+ * @...: optional group alignment
+ *
+ * The following block inside a struct:
+ *
+ *	__cacheline_group_begin_aligned(grp);
+ *	field a;
+ *	field b;
+ *	__cacheline_group_end_aligned(grp);
+ *
+ * will always be aligned to either the specified alignment or
+ * ``SMP_CACHE_BYTES``.
+ */
+#define __cacheline_group_begin_aligned(GROUP, ...)		\
+	__cacheline_group_begin(GROUP)				\
+	__aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES)
+
+/**
+ * __cacheline_group_end_aligned - declare an aligned group end
+ * @GROUP: name of the group
+ * @...: optional alignment (same as was in __cacheline_group_begin_aligned())
+ *
+ * Note that the end marker is aligned to sizeof(long) to allow more precise
+ * size assertion. It also declares a padding at the end to avoid next field
+ * falling into this cacheline.
+ */
+#define __cacheline_group_end_aligned(GROUP, ...)		\
+	__cacheline_group_end(GROUP) __aligned(sizeof(long));	\
+	struct { } __cacheline_group_pad__##GROUP		\
+	__aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES)
+
 #ifndef CACHELINE_ASSERT_GROUP_MEMBER
 #define CACHELINE_ASSERT_GROUP_MEMBER(TYPE, GROUP, MEMBER) \
 	BUILD_BUG_ON(!(offsetof(TYPE, MEMBER) >= \
-- 
cgit v1.2.3


From 39daa09d34ada1bc7227d68def63e0a2105b5496 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Thu, 20 Jun 2024 15:53:35 +0200
Subject: page_pool: use __cacheline_group_{begin, end}_aligned()

Instead of doing __cacheline_group_begin() __aligned(), use the new
__cacheline_group_{begin,end}_aligned(), so that it will take care
of the group alignment itself.
Also replace open-coded `4 * sizeof(long)` in two places with
a definition.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/net/page_pool/types.h | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index b70bcc14ceda..50569fed7868 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -129,6 +129,16 @@ struct page_pool_stats {
 };
 #endif
 
+/* The whole frag API block must stay within one cacheline. On 32-bit systems,
+ * sizeof(long) == sizeof(int), so that the block size is ``3 * sizeof(long)``.
+ * On 64-bit systems, the actual size is ``2 * sizeof(long) + sizeof(int)``.
+ * The closest pow-2 to both of them is ``4 * sizeof(long)``, so just use that
+ * one for simplicity.
+ * Having it aligned to a cacheline boundary may be excessive and doesn't bring
+ * any good.
+ */
+#define PAGE_POOL_FRAG_GROUP_ALIGN	(4 * sizeof(long))
+
 struct page_pool {
 	struct page_pool_params_fast p;
 
@@ -142,19 +152,11 @@ struct page_pool {
 	bool system:1;			/* This is a global percpu pool */
 #endif
 
-	/* The following block must stay within one cacheline. On 32-bit
-	 * systems, sizeof(long) == sizeof(int), so that the block size is
-	 * ``3 * sizeof(long)``. On 64-bit systems, the actual size is
-	 * ``2 * sizeof(long) + sizeof(int)``. The closest pow-2 to both of
-	 * them is ``4 * sizeof(long)``, so just use that one for simplicity.
-	 * Having it aligned to a cacheline boundary may be excessive and
-	 * doesn't bring any good.
-	 */
-	__cacheline_group_begin(frag) __aligned(4 * sizeof(long));
+	__cacheline_group_begin_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN);
 	long frag_users;
 	netmem_ref frag_page;
 	unsigned int frag_offset;
-	__cacheline_group_end(frag);
+	__cacheline_group_end_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN);
 
 	struct delayed_work release_dw;
 	void (*disconnect)(void *pool);
-- 
cgit v1.2.3


From 62c884256ea1f898b00f20729a306a4eeb1840b1 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Thu, 20 Jun 2024 15:53:36 +0200
Subject: libeth: add cacheline / struct layout assertion helpers

Add helpers to assert struct field layout, a bit more crazy and
networking-specific than in <linux/cache.h>. They assume you have
3 CL-aligned groups (read-mostly, read-write, cold) in a struct
you want to assert, and nothing besides them.
For 64-bit with 64-byte cachelines, the assertions are as strict
as possible, as the size can then be easily predicted.
For the rest, make sure they don't cross the specified bound.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/net/libeth/cache.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 include/net/libeth/cache.h

(limited to 'include')

diff --git a/include/net/libeth/cache.h b/include/net/libeth/cache.h
new file mode 100644
index 000000000000..bdb0c043ce61
--- /dev/null
+++ b/include/net/libeth/cache.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2024 Intel Corporation */
+
+#ifndef __LIBETH_CACHE_H
+#define __LIBETH_CACHE_H
+
+#include <linux/cache.h>
+
+/**
+ * libeth_cacheline_group_assert - make sure cacheline group size is expected
+ * @type: type of the structure containing the group
+ * @grp: group name inside the struct
+ * @sz: expected group size
+ */
+#if defined(CONFIG_64BIT) && SMP_CACHE_BYTES == 64
+#define libeth_cacheline_group_assert(type, grp, sz)			      \
+	static_assert(offsetof(type, __cacheline_group_end__##grp) -	      \
+		      offsetofend(type, __cacheline_group_begin__##grp) ==    \
+		      (sz))
+#define __libeth_cacheline_struct_assert(type, sz)			      \
+	static_assert(sizeof(type) == (sz))
+#else /* !CONFIG_64BIT || SMP_CACHE_BYTES != 64 */
+#define libeth_cacheline_group_assert(type, grp, sz)			      \
+	static_assert(offsetof(type, __cacheline_group_end__##grp) -	      \
+		      offsetofend(type, __cacheline_group_begin__##grp) <=    \
+		      (sz))
+#define __libeth_cacheline_struct_assert(type, sz)			      \
+	static_assert(sizeof(type) <= (sz))
+#endif /* !CONFIG_64BIT || SMP_CACHE_BYTES != 64 */
+
+#define __libeth_cls1(sz1)	SMP_CACHE_ALIGN(sz1)
+#define __libeth_cls2(sz1, sz2)	(SMP_CACHE_ALIGN(sz1) + SMP_CACHE_ALIGN(sz2))
+#define __libeth_cls3(sz1, sz2, sz3)					      \
+	(SMP_CACHE_ALIGN(sz1) + SMP_CACHE_ALIGN(sz2) + SMP_CACHE_ALIGN(sz3))
+#define __libeth_cls(...)						      \
+	CONCATENATE(__libeth_cls, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)
+
+/**
+ * libeth_cacheline_struct_assert - make sure CL-based struct size is expected
+ * @type: type of the struct
+ * @...: from 1 to 3 CL group sizes (read-mostly, read-write, cold)
+ *
+ * When a struct contains several CL groups, it's difficult to predict its size
+ * on different architectures. The macro instead takes sizes of all of the
+ * groups the structure contains and generates the final struct size.
+ */
+#define libeth_cacheline_struct_assert(type, ...)			      \
+	__libeth_cacheline_struct_assert(type, __libeth_cls(__VA_ARGS__));    \
+	static_assert(__alignof(type) >= SMP_CACHE_BYTES)
+
+/**
+ * libeth_cacheline_set_assert - make sure CL-based struct layout is expected
+ * @type: type of the struct
+ * @ro: expected size of the read-mostly group
+ * @rw: expected size of the read-write group
+ * @c: expected size of the cold group
+ *
+ * Check that each group size is expected and then do final struct size check.
+ */
+#define libeth_cacheline_set_assert(type, ro, rw, c)			      \
+	libeth_cacheline_group_assert(type, read_mostly, ro);		      \
+	libeth_cacheline_group_assert(type, read_write, rw);		      \
+	libeth_cacheline_group_assert(type, cold, c);			      \
+	libeth_cacheline_struct_assert(type, ro, rw, c)
+
+#endif /* __LIBETH_CACHE_H */
-- 
cgit v1.2.3


From 584e86e14c59d36688633002613792923620d8c0 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Wed, 10 Jul 2024 11:36:38 +0100
Subject: firmware: cs_dsp: Make wmfw and bin filename arguments const char *

The wmfw_filename and bin_filename strings passed into cs_dsp_power_up()
and cs_dsp_adsp1_power_up() should be const char *.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20240710103640.78197-3-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/firmware/cirrus/cs_dsp.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h
index 82687e07a7c2..8078dc377948 100644
--- a/include/linux/firmware/cirrus/cs_dsp.h
+++ b/include/linux/firmware/cirrus/cs_dsp.h
@@ -213,13 +213,13 @@ int cs_dsp_adsp2_init(struct cs_dsp *dsp);
 int cs_dsp_halo_init(struct cs_dsp *dsp);
 
 int cs_dsp_adsp1_power_up(struct cs_dsp *dsp,
-			  const struct firmware *wmfw_firmware, char *wmfw_filename,
-			  const struct firmware *coeff_firmware, char *coeff_filename,
+			  const struct firmware *wmfw_firmware, const char *wmfw_filename,
+			  const struct firmware *coeff_firmware, const char *coeff_filename,
 			  const char *fw_name);
 void cs_dsp_adsp1_power_down(struct cs_dsp *dsp);
 int cs_dsp_power_up(struct cs_dsp *dsp,
-		    const struct firmware *wmfw_firmware, char *wmfw_filename,
-		    const struct firmware *coeff_firmware, char *coeff_filename,
+		    const struct firmware *wmfw_firmware, const char *wmfw_filename,
+		    const struct firmware *coeff_firmware, const char *coeff_filename,
 		    const char *fw_name);
 void cs_dsp_power_down(struct cs_dsp *dsp);
 int cs_dsp_run(struct cs_dsp *dsp);
-- 
cgit v1.2.3


From dc0e5ca8856dc6a97e3b117879dfb2b52bda06b6 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Wed, 10 Jul 2024 11:36:40 +0100
Subject: firmware: cs_dsp: Rename fw_ver to wmfw_ver

Rename the confusingly named struct member fw_ver to wmfw_ver. It
contains the wmfw format version of the loaded wmfw file.

This commit also contains an update to wm_adsp for the new name.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20240710103640.78197-5-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/firmware/cirrus/cs_dsp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h
index 8078dc377948..2e649810169f 100644
--- a/include/linux/firmware/cirrus/cs_dsp.h
+++ b/include/linux/firmware/cirrus/cs_dsp.h
@@ -167,7 +167,7 @@ struct cs_dsp {
 	const struct cs_dsp_region *mem;
 	int num_mems;
 
-	int fw_ver;
+	int wmfw_ver;
 
 	bool booted;
 	bool running;
-- 
cgit v1.2.3


From 3c1ff93b4deea502cd8b0869839557cab2a28b71 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 9 Jul 2024 18:56:20 -0700
Subject: regmap: Implement regmap_multi_reg_read()

regmap_multi_reg_read() is similar to regmap_bilk_read() but reads from
an array of non-sequential registers.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20240710015622.1960522-2-linux@roeck-us.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regmap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index a6bc2980a98b..9c1ddbf82719 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1237,6 +1237,8 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg,
 		      void *val, size_t val_len);
 int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 		     size_t val_count);
+int regmap_multi_reg_read(struct regmap *map, unsigned int *reg, void *val,
+			  size_t val_count);
 int regmap_update_bits_base(struct regmap *map, unsigned int reg,
 			    unsigned int mask, unsigned int val,
 			    bool *change, bool async, bool force);
-- 
cgit v1.2.3


From 5aaac1aece4e4db9d620791a33f7a4173c660e65 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Thu, 20 Jun 2024 15:53:45 +0200
Subject: libeth: support different types of buffers for Rx

Unlike previous generations, idpf requires more buffer types for optimal
performance. This includes: header buffers, short buffers, and
no-overhead buffers (w/o headroom and tailroom, for TCP zerocopy when
the header split is enabled).
Introduce libeth Rx buffer type and calculate page_pool params
accordingly. All the HW-related details like buffer alignment are still
accounted. For the header buffers, pick 256 bytes as in most places in
the kernel (have you ever seen frames with bigger headers?).

Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/net/libeth/rx.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h
index f29ea3e34c6c..43574bd6612f 100644
--- a/include/net/libeth/rx.h
+++ b/include/net/libeth/rx.h
@@ -17,6 +17,8 @@
 #define LIBETH_MAX_HEADROOM	LIBETH_SKB_HEADROOM
 /* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
 #define LIBETH_RX_LL_LEN	(ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
+/* Maximum supported L2-L4 header length */
+#define LIBETH_MAX_HEAD		roundup_pow_of_two(max(MAX_HEADER, 256))
 
 /* Always use order-0 pages */
 #define LIBETH_RX_PAGE_ORDER	0
@@ -43,6 +45,18 @@ struct libeth_fqe {
 	u32			truesize;
 } __aligned_largest;
 
+/**
+ * enum libeth_fqe_type - enum representing types of Rx buffers
+ * @LIBETH_FQE_MTU: buffer size is determined by MTU
+ * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames
+ * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers
+ */
+enum libeth_fqe_type {
+	LIBETH_FQE_MTU		= 0U,
+	LIBETH_FQE_SHORT,
+	LIBETH_FQE_HDR,
+};
+
 /**
  * struct libeth_fq - structure representing a buffer (fill) queue
  * @fp: hotpath part of the structure
@@ -50,6 +64,8 @@ struct libeth_fqe {
  * @fqes: array of Rx buffers
  * @truesize: size to allocate per buffer, w/overhead
  * @count: number of descriptors/buffers the queue has
+ * @type: type of the buffers this queue has
+ * @hsplit: flag whether header split is enabled
  * @buf_len: HW-writeable length per each buffer
  * @nid: ID of the closest NUMA node with memory
  */
@@ -63,6 +79,9 @@ struct libeth_fq {
 	);
 
 	/* Cold fields */
+	enum libeth_fqe_type	type:2;
+	bool			hsplit:1;
+
 	u32			buf_len;
 	int			nid;
 };
-- 
cgit v1.2.3


From 9fa001cf3bb0598aad09d15b2896f7db9ef87637 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Mon, 1 Jul 2024 18:59:31 +0000
Subject: mm: memcg: drop obsolete cache line padding in struct mem_cgroup

After the grouping of the cgroup v1-related fields and the corresponding
reorganization of the struct mem_cgroup, the existing cache line padding
doesn't make much sense anymore.  Let's drop it for now and put back to
new places, if necessary.

Link: https://lkml.kernel.org/r/20240701185932.704807-1-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Suggested-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d0c9365ff039..8b5b3ddeba05 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -220,8 +220,6 @@ struct mem_cgroup {
 	/* handle for "memory.swap.events" */
 	struct cgroup_file swap_events_file;
 
-	CACHELINE_PADDING(_pad1_);
-
 	/* memory.stat */
 	struct memcg_vmstats	*vmstats;
 
@@ -305,8 +303,6 @@ struct mem_cgroup {
 	bool tcpmem_active;
 	int tcpmem_pressure;
 
-	CACHELINE_PADDING(_pad2_);
-
 	/*
 	 * set > 0 if pages under this cgroup are moving to other cgroup.
 	 */
-- 
cgit v1.2.3


From 6df13230b612af81ce04f20bb37a02e58ef71925 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Mon, 1 Jul 2024 18:59:32 +0000
Subject: mm: memcg: add cache line padding to mem_cgroup_per_node

Memcg v1-specific fields serve a buffer function between read-mostly and
update often parts of the mem_cgroup_per_node structure.  If
CONFIG_MEMCG_V1 is not set and these fields are not present, an explicit
cacheline padding is needed.

Link: https://lkml.kernel.org/r/20240701185932.704807-2-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Suggested-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8b5b3ddeba05..60418934827c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -95,14 +95,16 @@ struct mem_cgroup_per_node {
 #ifdef CONFIG_MEMCG_V1
 	/*
 	 * Memcg-v1 only stuff in middle as buffer between read mostly fields
-	 * and update often fields to avoid false sharing. Once v1 stuff is
-	 * moved in a separate struct, an explicit padding is needed.
+	 * and update often fields to avoid false sharing. If v1 stuff is
+	 * not present, an explicit padding is needed.
 	 */
 
 	struct rb_node		tree_node;	/* RB tree node */
 	unsigned long		usage_in_excess;/* Set to the value by which */
 						/* the soft limit is exceeded*/
 	bool			on_tree;
+#else
+	CACHELINE_PADDING(_pad1_);
 #endif
 
 	/* Fields which get updated often at the end. */
-- 
cgit v1.2.3


From 3a3b7fec3974f954600844e41d773c00857ef48a Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 1 Jul 2024 11:31:15 -0400
Subject: mm: remove CONFIG_MEMCG_KMEM

CONFIG_MEMCG_KMEM used to be a user-visible option for whether slab
tracking is enabled.  It has been default-enabled and equivalent to
CONFIG_MEMCG for almost a decade.  We've only grown more kernel memory
accounting sites since, and there is no imaginable cgroup usecase going
forward that wants to track user pages but not the multitude of
user-drivable kernel allocations.

Link: https://lkml.kernel.org/r/20240701153148.452230-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/bpf.h         |  4 ++--
 include/linux/list_lru.h    |  2 +-
 include/linux/memcontrol.h  | 22 +++++-----------------
 include/linux/sched.h       |  3 +--
 include/linux/slab.h        | 12 ++++++------
 include/trace/events/kmem.h |  4 ++--
 6 files changed, 17 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5e694a308081..b8637555c9c2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -275,7 +275,7 @@ struct bpf_map {
 	u32 btf_value_type_id;
 	u32 btf_vmlinux_value_type_id;
 	struct btf *btf;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
 	struct obj_cgroup *objcg;
 #endif
 	char name[BPF_OBJ_NAME_LEN];
@@ -2252,7 +2252,7 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
 
 int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
 			unsigned long nr_pages, struct page **page_array);
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
 			   int node);
 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags);
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 792b67ceb631..5099a8ccd5f4 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -50,7 +50,7 @@ struct list_lru_node {
 
 struct list_lru {
 	struct list_lru_node	*node;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
 	struct list_head	list;
 	int			shrinker_id;
 	bool			memcg_aware;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 60418934827c..7e2eb091049a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -195,7 +195,7 @@ struct mem_cgroup {
 	/* Range enforcement for interrupt charges */
 	struct work_struct high_work;
 
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
+#ifdef CONFIG_ZSWAP
 	unsigned long zswap_max;
 
 	/*
@@ -236,7 +236,6 @@ struct mem_cgroup {
 	 */
 	unsigned long		socket_pressure;
 
-#ifdef CONFIG_MEMCG_KMEM
 	int kmemcg_id;
 	/*
 	 * memcg->objcg is wiped out as a part of the objcg repaprenting
@@ -247,7 +246,6 @@ struct mem_cgroup {
 	struct obj_cgroup	*orig_objcg;
 	/* list of inherited objcgs, protected by objcg_lock */
 	struct list_head objcg_list;
-#endif
 
 	struct memcg_vmstats_percpu __percpu *vmstats_percpu;
 
@@ -532,7 +530,6 @@ retry:
 	return memcg;
 }
 
-#ifdef CONFIG_MEMCG_KMEM
 /*
  * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set.
  * @folio: Pointer to the folio.
@@ -548,15 +545,6 @@ static inline bool folio_memcg_kmem(struct folio *folio)
 	return folio->memcg_data & MEMCG_DATA_KMEM;
 }
 
-
-#else
-static inline bool folio_memcg_kmem(struct folio *folio)
-{
-	return false;
-}
-
-#endif
-
 static inline bool PageMemcgKmem(struct page *page)
 {
 	return folio_memcg_kmem(page_folio(page));
@@ -1488,7 +1476,7 @@ static inline void split_page_memcg(struct page *head, int old_order, int new_or
  * if MEMCG_DATA_OBJEXTS is set.
  */
 struct slabobj_ext {
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
 	struct obj_cgroup *objcg;
 #endif
 #ifdef CONFIG_MEM_ALLOC_PROFILING
@@ -1663,7 +1651,7 @@ static inline void set_shrinker_bit(struct mem_cgroup *memcg,
 }
 #endif
 
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
 bool mem_cgroup_kmem_disabled(void);
 int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
 void __memcg_kmem_uncharge_page(struct page *page, int order);
@@ -1806,9 +1794,9 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
 {
 }
 
-#endif /* CONFIG_MEMCG_KMEM */
+#endif /* CONFIG_MEMCG */
 
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
+#if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP)
 bool obj_cgroup_may_zswap(struct obj_cgroup *objcg);
 void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size);
 void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a7770c566c4d..82da65131a6b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1457,9 +1457,8 @@ struct task_struct {
 
 	/* Used by memcontrol for targeted memcg charge: */
 	struct mem_cgroup		*active_memcg;
-#endif
 
-#ifdef CONFIG_MEMCG_KMEM
+	/* Cache for current->cgroups->memcg->objcg lookups: */
 	struct obj_cgroup		*objcg;
 #endif
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7247e217e21b..a332dd2fa6cd 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -41,7 +41,7 @@ enum _slab_flag_bits {
 #ifdef CONFIG_FAILSLAB
 	_SLAB_FAILSLAB,
 #endif
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
 	_SLAB_ACCOUNT,
 #endif
 #ifdef CONFIG_KASAN_GENERIC
@@ -171,7 +171,7 @@ enum _slab_flag_bits {
 # define SLAB_FAILSLAB		__SLAB_FLAG_UNUSED
 #endif
 /* Account to memcg */
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
 # define SLAB_ACCOUNT		__SLAB_FLAG_BIT(_SLAB_ACCOUNT)
 #else
 # define SLAB_ACCOUNT		__SLAB_FLAG_UNUSED
@@ -407,7 +407,7 @@ enum kmalloc_cache_type {
 #ifndef CONFIG_ZONE_DMA
 	KMALLOC_DMA = KMALLOC_NORMAL,
 #endif
-#ifndef CONFIG_MEMCG_KMEM
+#ifndef CONFIG_MEMCG
 	KMALLOC_CGROUP = KMALLOC_NORMAL,
 #endif
 	KMALLOC_RANDOM_START = KMALLOC_NORMAL,
@@ -420,7 +420,7 @@ enum kmalloc_cache_type {
 #ifdef CONFIG_ZONE_DMA
 	KMALLOC_DMA,
 #endif
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
 	KMALLOC_CGROUP,
 #endif
 	NR_KMALLOC_TYPES
@@ -435,7 +435,7 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
 #define KMALLOC_NOT_NORMAL_BITS					\
 	(__GFP_RECLAIMABLE |					\
 	(IS_ENABLED(CONFIG_ZONE_DMA)   ? __GFP_DMA : 0) |	\
-	(IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0))
+	(IS_ENABLED(CONFIG_MEMCG) ? __GFP_ACCOUNT : 0))
 
 extern unsigned long random_kmalloc_seed;
 
@@ -463,7 +463,7 @@ static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigne
 	 */
 	if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA))
 		return KMALLOC_DMA;
-	if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || (flags & __GFP_RECLAIMABLE))
+	if (!IS_ENABLED(CONFIG_MEMCG) || (flags & __GFP_RECLAIMABLE))
 		return KMALLOC_RECLAIM;
 	else
 		return KMALLOC_CGROUP;
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 8a829e0f6e55..b37eb0a7060f 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -36,7 +36,7 @@ TRACE_EVENT(kmem_cache_alloc,
 		__entry->bytes_alloc	= s->size;
 		__entry->gfp_flags	= (__force unsigned long)gfp_flags;
 		__entry->node		= node;
-		__entry->accounted	= IS_ENABLED(CONFIG_MEMCG_KMEM) ?
+		__entry->accounted	= IS_ENABLED(CONFIG_MEMCG) ?
 					  ((gfp_flags & __GFP_ACCOUNT) ||
 					  (s->flags & SLAB_ACCOUNT)) : false;
 	),
@@ -87,7 +87,7 @@ TRACE_EVENT(kmalloc,
 		__entry->bytes_alloc,
 		show_gfp_flags(__entry->gfp_flags),
 		__entry->node,
-		(IS_ENABLED(CONFIG_MEMCG_KMEM) &&
+		(IS_ENABLED(CONFIG_MEMCG) &&
 		 (__entry->gfp_flags & (__force unsigned long)__GFP_ACCOUNT)) ? "true" : "false")
 );
 
-- 
cgit v1.2.3


From 259043e3b730e0aa6408bff27af7edf7a5c9101c Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Sun, 30 Jun 2024 11:22:31 +1200
Subject: mm: zswap: fix zswap_never_enabled() for CONFIG_ZSWAP==N

If CONFIG_ZSWAP is set to N, it means zswap cannot be enabled.
zswap_never_enabled() should return true.

The only effect of this issue is that with Barry's latest large folio
swapin patches for zram ("mm: support mTHP swap-in for zRAM-like
swapfile"), we will always fallback to order-0 swapin, even mistakenly
when !CONFIG_ZSWAP.

Basically this bug makes Barry's in progress patches not work at all.

The API was created to inform the mm core that zswap has never been
enabled, allowing the mm core to perform mTHP swap-in.  This is a
transitional solution until zswap supports mTHP.  If zswap has been
enabled, performing mTHP swap-in will result in corrupted data.  You
may find the answer in the mTHP swap-in series:

https://lore.kernel.org/linux-mm/CAJD7tkZ4FQr6HZpduOdvmqgg_-whuZYE-Bz5O2t6yzw6Yg+v1A@mail.gmail.com/

Link: https://lkml.kernel.org/r/20240629232231.42394-1-21cnbao@gmail.com
Fixes: 0300e17d67c3 ("mm: zswap: add zswap_never_enabled()")
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
Acked-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Chris Li <chrisl@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/zswap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index bf83ae5e285d..6cecb4a4f68b 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -68,7 +68,7 @@ static inline bool zswap_is_enabled(void)
 
 static inline bool zswap_never_enabled(void)
 {
-	return false;
+	return true;
 }
 
 #endif
-- 
cgit v1.2.3


From a43fe27d650375cd9e5ea915c538f6f9eabd185e Mon Sep 17 00:00:00 2001
From: Xiao Wang <xiao.w.wang@intel.com>
Date: Fri, 21 Jun 2024 13:47:07 +0800
Subject: riscv: Optimize crc32 with Zbc extension

As suggested by the B-ext spec, the Zbc (carry-less multiplication)
instructions can be used to accelerate CRC calculations. Currently, the
crc32 is the most widely used crc function inside kernel, so this patch
focuses on the optimization of just the crc32 APIs.

Compared with the current table-lookup based optimization, Zbc based
optimization can also achieve large stride during CRC calculation loop,
meantime, it avoids the memory access latency of the table-lookup based
implementation and it reduces memory footprint.

If Zbc feature is not supported in a runtime environment, then the
table-lookup based implementation would serve as fallback via alternative
mechanism.

By inspecting the vmlinux built by gcc v12.2.0 with default optimization
level (-O2), we can see below instruction count change for each 8-byte
stride in the CRC32 loop:

rv64: crc32_be (54->31), crc32_le (54->13), __crc32c_le (54->13)
rv32: crc32_be (50->32), crc32_le (50->16), __crc32c_le (50->16)

The compile target CPU is little endian, extra effort is needed for byte
swapping for the crc32_be API, thus, the instruction count change is not
as significant as that in the *_le cases.

This patch is tested on QEMU VM with the kernel CRC32 selftest for both
rv64 and rv32. Running the CRC32 selftest on a real hardware (SpacemiT K1)
with Zbc extension shows 65% and 125% performance improvement respectively
on crc32_test() and crc32c_test().

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
Link: https://lore.kernel.org/r/20240621054707.1847548-1-xiao.w.wang@intel.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 include/linux/crc32.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 9e8a032c1788..87f788c0d607 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -9,7 +9,9 @@
 #include <linux/bitrev.h>
 
 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len);
+u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len);
 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len);
+u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len);
 
 /**
  * crc32_le_combine - Combine two crc32 check values into one. For two
@@ -37,6 +39,7 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
 }
 
 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len);
+u32 __pure __crc32c_le_base(u32 crc, unsigned char const *p, size_t len);
 
 /**
  * __crc32c_le_combine - Combine two crc32c check values into one. For two
-- 
cgit v1.2.3


From bed6b0317441d82c32506750ccd868d83850e6f4 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Tue, 25 Jun 2024 11:16:03 +0800
Subject: f2fs: clean up addrs_per_{inode,block}()

Introduce a new help addrs_per_page() to wrap common code
from addrs_per_inode() and addrs_per_block() for cleanup.

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 41d1d71c36ff..01bee2b289c2 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -259,15 +259,14 @@ struct node_footer {
 #define CUR_ADDRS_PER_INODE(inode)	(DEF_ADDRS_PER_INODE - \
 					get_extra_isize(inode))
 #define DEF_NIDS_PER_INODE	5	/* Node IDs in an Inode */
-#define ADDRS_PER_INODE(inode)	addrs_per_inode(inode)
+#define ADDRS_PER_INODE(inode)	addrs_per_page(inode, true)
 /* Address Pointers in a Direct Block */
 #define DEF_ADDRS_PER_BLOCK	((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32))
-#define ADDRS_PER_BLOCK(inode)	addrs_per_block(inode)
+#define ADDRS_PER_BLOCK(inode)	addrs_per_page(inode, false)
 /* Node IDs in an Indirect Block */
 #define NIDS_PER_BLOCK		((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32))
 
-#define ADDRS_PER_PAGE(page, inode)	\
-	(IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK(inode))
+#define ADDRS_PER_PAGE(page, inode)	(addrs_per_page(inode, IS_INODE(page)))
 
 #define	NODE_DIR1_BLOCK		(DEF_ADDRS_PER_INODE + 1)
 #define	NODE_DIR2_BLOCK		(DEF_ADDRS_PER_INODE + 2)
-- 
cgit v1.2.3


From 1037885b309cfca5b770137209e9b51d1b50cc27 Mon Sep 17 00:00:00 2001
From: Drew Fustini <dfustini@tenstorrent.com>
Date: Sun, 23 Jun 2024 19:12:31 -0700
Subject: dt-bindings: clock: Document T-Head TH1520 AP_SUBSYS controller

Document bindings for the T-Head TH1520 AP sub-system clock controller.

Link: https://openbeagle.org/beaglev-ahead/beaglev-ahead/-/blob/main/docs/TH1520%20System%20User%20Manual.pdf
Co-developed-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Drew Fustini <dfustini@tenstorrent.com>
Link: https://lore.kernel.org/r/20240623-th1520-clk-v2-1-ad8d6432d9fb@tenstorrent.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/thead,th1520-clk-ap.h | 96 +++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 include/dt-bindings/clock/thead,th1520-clk-ap.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/thead,th1520-clk-ap.h b/include/dt-bindings/clock/thead,th1520-clk-ap.h
new file mode 100644
index 000000000000..a199784b3512
--- /dev/null
+++ b/include/dt-bindings/clock/thead,th1520-clk-ap.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (C) 2023 Vivo Communication Technology Co. Ltd.
+ * Authors: Yangtao Li <frank.li@vivo.com>
+ */
+
+#ifndef _DT_BINDINGS_CLK_TH1520_H_
+#define _DT_BINDINGS_CLK_TH1520_H_
+
+#define CLK_CPU_PLL0		0
+#define CLK_CPU_PLL1		1
+#define CLK_GMAC_PLL		2
+#define CLK_VIDEO_PLL		3
+#define CLK_DPU0_PLL		4
+#define CLK_DPU1_PLL		5
+#define CLK_TEE_PLL		6
+#define CLK_C910_I0		7
+#define CLK_C910		8
+#define CLK_BROM		9
+#define CLK_BMU			10
+#define CLK_AHB2_CPUSYS_HCLK	11
+#define CLK_APB3_CPUSYS_PCLK	12
+#define CLK_AXI4_CPUSYS2_ACLK	13
+#define CLK_AON2CPU_A2X		14
+#define CLK_X2X_CPUSYS		15
+#define CLK_AXI_ACLK		16
+#define CLK_CPU2AON_X2H		17
+#define CLK_PERI_AHB_HCLK	18
+#define CLK_CPU2PERI_X2H	19
+#define CLK_PERI_APB_PCLK	20
+#define CLK_PERI2APB_PCLK	21
+#define CLK_PERISYS_APB1_HCLK	22
+#define CLK_PERISYS_APB2_HCLK	23
+#define CLK_PERISYS_APB3_HCLK	24
+#define CLK_PERISYS_APB4_HCLK	25
+#define CLK_OSC12M		26
+#define CLK_OUT1		27
+#define CLK_OUT2		28
+#define CLK_OUT3		29
+#define CLK_OUT4		30
+#define CLK_APB_PCLK		31
+#define CLK_NPU			32
+#define CLK_NPU_AXI		33
+#define CLK_VI			34
+#define CLK_VI_AHB		35
+#define CLK_VO_AXI		36
+#define CLK_VP_APB		37
+#define CLK_VP_AXI		38
+#define CLK_CPU2VP		39
+#define CLK_VENC		40
+#define CLK_DPU0		41
+#define CLK_DPU1		42
+#define CLK_EMMC_SDIO		43
+#define CLK_GMAC1		44
+#define CLK_PADCTRL1		45
+#define CLK_DSMART		46
+#define CLK_PADCTRL0		47
+#define CLK_GMAC_AXI		48
+#define CLK_GPIO3		49
+#define CLK_GMAC0		50
+#define CLK_PWM			51
+#define CLK_QSPI0		52
+#define CLK_QSPI1		53
+#define CLK_SPI			54
+#define CLK_UART0_PCLK		55
+#define CLK_UART1_PCLK		56
+#define CLK_UART2_PCLK		57
+#define CLK_UART3_PCLK		58
+#define CLK_UART4_PCLK		59
+#define CLK_UART5_PCLK		60
+#define CLK_GPIO0		61
+#define CLK_GPIO1		62
+#define CLK_GPIO2		63
+#define CLK_I2C0		64
+#define CLK_I2C1		65
+#define CLK_I2C2		66
+#define CLK_I2C3		67
+#define CLK_I2C4		68
+#define CLK_I2C5		69
+#define CLK_SPINLOCK		70
+#define CLK_DMA			71
+#define CLK_MBOX0		72
+#define CLK_MBOX1		73
+#define CLK_MBOX2		74
+#define CLK_MBOX3		75
+#define CLK_WDT0		76
+#define CLK_WDT1		77
+#define CLK_TIMER0		78
+#define CLK_TIMER1		79
+#define CLK_SRAM0		80
+#define CLK_SRAM1		81
+#define CLK_SRAM2		82
+#define CLK_SRAM3		83
+#define CLK_PLL_GMAC_100M	84
+#define CLK_UART_SCLK		85
+#endif
-- 
cgit v1.2.3


From 93ef12d92f65facf8e02ad6578a898a190aa6cf4 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 8 Jul 2024 14:15:57 -0700
Subject: scsi: ufs: core: Initialize struct uic_command once

Instead of first zero-initializing struct uic_command and next initializing
it memberwise, initialize all members at once.

Reviewed-by: Daejun Park <daejun7.park@samsung.com>
Reviewed-by: Avri Altman <avri.altman@wdc.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Reviewed-by: Peter Wang <peter.wang@mediatek.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240708211716.2827751-3-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 9e0581115b34..d4d63507d090 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -73,8 +73,8 @@ enum ufs_event_type {
  * @done: UIC command completion
  */
 struct uic_command {
-	u32 command;
-	u32 argument1;
+	const u32 command;
+	const u32 argument1;
 	u32 argument2;
 	u32 argument3;
 	int cmd_active;
-- 
cgit v1.2.3


From b53eb9a050d76fc695258e04384e71b3fbb1e7d5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 8 Jul 2024 14:15:59 -0700
Subject: scsi: ufs: core: Rename the MASK_TRANSFER_REQUESTS_SLOTS constant

Rename this constant to prepare for the introduction of the
MASK_TRANSFER_REQUESTS_SLOTS_MCQ constant. The acronym "SDB" stands for
"single doorbell" (mode).

Reviewed-by: Peter Wang <peter.wang@mediatek.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240708211716.2827751-5-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h
index c50f92bf2e1d..8d0cc73537c6 100644
--- a/include/ufs/ufshci.h
+++ b/include/ufs/ufshci.h
@@ -67,7 +67,7 @@ enum {
 
 /* Controller capability masks */
 enum {
-	MASK_TRANSFER_REQUESTS_SLOTS		= 0x0000001F,
+	MASK_TRANSFER_REQUESTS_SLOTS_SDB	= 0x0000001F,
 	MASK_NUMBER_OUTSTANDING_RTT		= 0x0000FF00,
 	MASK_TASK_MANAGEMENT_REQUEST_SLOTS	= 0x00070000,
 	MASK_EHSLUTRD_SUPPORTED			= 0x00400000,
-- 
cgit v1.2.3


From 0fca3318e550d48e9a6c844763bf9eab91c30f07 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 8 Jul 2024 14:16:01 -0700
Subject: scsi: ufs: core: Inline is_mcq_enabled()

Improve code readability by inlining is_mcq_enabled().

Cc: Peter Wang <peter.wang@mediatek.com>
Cc: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240708211716.2827751-7-bvanassche@acm.org
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index d4d63507d090..c0e28a512b3c 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1132,11 +1132,6 @@ struct ufs_hw_queue {
 
 #define MCQ_QCFG_SIZE		0x40
 
-static inline bool is_mcq_enabled(struct ufs_hba *hba)
-{
-	return hba->mcq_enabled;
-}
-
 static inline unsigned int ufshcd_mcq_opr_offset(struct ufs_hba *hba,
 		enum ufshcd_mcq_opr opr, int idx)
 {
-- 
cgit v1.2.3


From af568c7e8292bd96f5a4f3d81dcc099bcfe7cb73 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 8 Jul 2024 14:16:05 -0700
Subject: scsi: ufs: mcq: Make .get_hba_mac() optional

UFSHCI controllers that are compliant with the UFSHCI 4.0 standard report
the maximum number of supported commands in the controller capabilities
register. Use that value if .get_hba_mac == NULL.

Reviewed-by: Peter Wang <peter.wang@mediatek.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240708211716.2827751-11-bvanassche@acm.org
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 4 +++-
 include/ufs/ufshci.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index c0e28a512b3c..6518997930f3 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -325,7 +325,9 @@ struct ufs_pwr_mode_info {
  * @event_notify: called to notify important events
  * @reinit_notify: called to notify reinit of UFSHCD during max gear switch
  * @mcq_config_resource: called to configure MCQ platform resources
- * @get_hba_mac: called to get vendor specific mac value, mandatory for mcq mode
+ * @get_hba_mac: reports maximum number of outstanding commands supported by
+ *	the controller. Should be implemented for UFSHCI 4.0 or later
+ *	controllers that are not compliant with the UFSHCI 4.0 specification.
  * @op_runtime_config: called to config Operation and runtime regs Pointers
  * @get_outstanding_cqs: called to get outstanding completion queues
  * @config_esi: called to config Event Specific Interrupt
diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h
index 8d0cc73537c6..38fe97971a65 100644
--- a/include/ufs/ufshci.h
+++ b/include/ufs/ufshci.h
@@ -68,6 +68,7 @@ enum {
 /* Controller capability masks */
 enum {
 	MASK_TRANSFER_REQUESTS_SLOTS_SDB	= 0x0000001F,
+	MASK_TRANSFER_REQUESTS_SLOTS_MCQ	= 0x000000FF,
 	MASK_NUMBER_OUTSTANDING_RTT		= 0x0000FF00,
 	MASK_TASK_MANAGEMENT_REQUEST_SLOTS	= 0x00070000,
 	MASK_EHSLUTRD_SUPPORTED			= 0x00400000,
-- 
cgit v1.2.3


From c2a90eee29f41630225c9a64d26c425e1d50b401 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 8 Jul 2024 16:53:25 -0700
Subject: scsi: ufs: core: Add UFSHCD_QUIRK_CUSTOM_CRYPTO_PROFILE

Add UFSHCD_QUIRK_CUSTOM_CRYPTO_PROFILE which lets UFS host drivers
initialize the blk_crypto_profile themselves rather than have it be
initialized by ufshcd-core according to the UFSHCI standard.  This is
needed to support inline encryption on the "Exynos" UFS controller which
has a nonstandard interface.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20240708235330.103590-2-ebiggers@kernel.org
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 9e0581115b34..4f8982e52ac4 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -645,6 +645,15 @@ enum ufshcd_quirks {
 	 * thus need this quirk to skip related flow.
 	 */
 	UFSHCD_QUIRK_MCQ_BROKEN_RTC			= 1 << 21,
+
+	/*
+	 * This quirk needs to be enabled if the host controller supports inline
+	 * encryption but it needs to initialize the crypto capabilities in a
+	 * nonstandard way and/or needs to override blk_crypto_ll_ops.  If
+	 * enabled, the standard code won't initialize the blk_crypto_profile;
+	 * ufs_hba_variant_ops::init() must do it instead.
+	 */
+	UFSHCD_QUIRK_CUSTOM_CRYPTO_PROFILE		= 1 << 22,
 };
 
 enum ufshcd_caps {
-- 
cgit v1.2.3


From e95881e0081a30e132b5ca087f1e07fc08608a7e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 8 Jul 2024 16:53:27 -0700
Subject: scsi: ufs: core: Add UFSHCD_QUIRK_BROKEN_CRYPTO_ENABLE

Add UFSHCD_QUIRK_BROKEN_CRYPTO_ENABLE which tells the UFS core to not use
the crypto enable bit defined by the UFS specification.  This is needed to
support inline encryption on the "Exynos" UFS controller.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20240708235330.103590-4-ebiggers@kernel.org
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 4f8982e52ac4..e07104f355c0 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -654,6 +654,13 @@ enum ufshcd_quirks {
 	 * ufs_hba_variant_ops::init() must do it instead.
 	 */
 	UFSHCD_QUIRK_CUSTOM_CRYPTO_PROFILE		= 1 << 22,
+
+	/*
+	 * This quirk needs to be enabled if the host controller supports inline
+	 * encryption but does not support the CRYPTO_GENERAL_ENABLE bit, i.e.
+	 * host controller initialization fails if that bit is set.
+	 */
+	UFSHCD_QUIRK_BROKEN_CRYPTO_ENABLE		= 1 << 23,
 };
 
 enum ufshcd_caps {
-- 
cgit v1.2.3


From 8ecea3da1567e0648b5d37a6faec73fc9c8571ba Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 8 Jul 2024 16:53:28 -0700
Subject: scsi: ufs: core: Add fill_crypto_prdt variant op

Add a variant op to allow host drivers to initialize nonstandard
crypto-related fields in the PRDT.  This is needed to support inline
encryption on the "Exynos" UFS controller.

Note that this will be used together with the support for overriding the
PRDT entry size that was already added by commit ada1e653a5ea ("scsi: ufs:
core: Allow UFS host drivers to override the sg entry size").

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20240708235330.103590-5-ebiggers@kernel.org
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index e07104f355c0..fb791d1a6d00 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -322,6 +322,7 @@ struct ufs_pwr_mode_info {
  * @device_reset: called to issue a reset pulse on the UFS device
  * @config_scaling_param: called to configure clock scaling parameters
  * @program_key: program or evict an inline encryption key
+ * @fill_crypto_prdt: initialize crypto-related fields in the PRDT
  * @event_notify: called to notify important events
  * @reinit_notify: called to notify reinit of UFSHCD during max gear switch
  * @mcq_config_resource: called to configure MCQ platform resources
@@ -367,6 +368,9 @@ struct ufs_hba_variant_ops {
 				struct devfreq_simple_ondemand_data *data);
 	int	(*program_key)(struct ufs_hba *hba,
 			       const union ufs_crypto_cfg_entry *cfg, int slot);
+	int	(*fill_crypto_prdt)(struct ufs_hba *hba,
+				    const struct bio_crypt_ctx *crypt_ctx,
+				    void *prdt, unsigned int num_segments);
 	void	(*event_notify)(struct ufs_hba *hba,
 				enum ufs_event_type evt, void *data);
 	void	(*reinit_notify)(struct ufs_hba *);
-- 
cgit v1.2.3


From 4c45dba50a3750a0834353c4187e7896b158bc0c Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 8 Jul 2024 16:53:29 -0700
Subject: scsi: ufs: core: Add UFSHCD_QUIRK_KEYS_IN_PRDT

Since the nonstandard inline encryption support on Exynos SoCs requires
that raw cryptographic keys be copied into the PRDT, it is desirable to
zeroize those keys after each request to keep them from being left in
memory.  Therefore, add a quirk bit that enables the zeroization.

We could instead do the zeroization unconditionally.  However, using a
quirk bit avoids adding the zeroization overhead to standard devices.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20240708235330.103590-6-ebiggers@kernel.org
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index fb791d1a6d00..80accfbe48aa 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -665,6 +665,14 @@ enum ufshcd_quirks {
 	 * host controller initialization fails if that bit is set.
 	 */
 	UFSHCD_QUIRK_BROKEN_CRYPTO_ENABLE		= 1 << 23,
+
+	/*
+	 * This quirk needs to be enabled if the host controller driver copies
+	 * cryptographic keys into the PRDT in order to send them to hardware,
+	 * and therefore the PRDT should be zeroized after each request (as per
+	 * the standard best practice for managing keys).
+	 */
+	UFSHCD_QUIRK_KEYS_IN_PRDT			= 1 << 24,
 };
 
 enum ufshcd_caps {
-- 
cgit v1.2.3


From a93c2e5fe766825d6264823fd1dca9aae747cf5d Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Sat, 6 Jul 2024 13:20:01 +0200
Subject: i2c: reword i2c_algorithm according to newest specification

Start changing the wording of the I2C main header wrt. the newest I2C
v7 and SMBus 3.2 specifications and replace "master/slave" with more
appropriate terms. The first step renames the members of struct
i2c_algorithm. Once all in-tree users are converted, the anonymous union
will go away again. All this work will also pave the way for finally
seperating the monolithic header into more fine-grained headers like
"i2c/clients.h" etc.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Andi Shyti <andi.shyti@kernel.org>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
---
 include/linux/i2c.h | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 424acb98c7c2..9d45b7b912dd 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -548,10 +548,18 @@ struct i2c_algorithm {
 	 * master_xfer should return the number of messages successfully
 	 * processed, or a negative value on error
 	 */
-	int (*master_xfer)(struct i2c_adapter *adap, struct i2c_msg *msgs,
-			   int num);
-	int (*master_xfer_atomic)(struct i2c_adapter *adap,
+	union {
+		int (*xfer)(struct i2c_adapter *adap, struct i2c_msg *msgs,
+			    int num);
+		int (*master_xfer)(struct i2c_adapter *adap, struct i2c_msg *msgs,
+				   int num);
+	};
+	union {
+		int (*xfer_atomic)(struct i2c_adapter *adap,
 				   struct i2c_msg *msgs, int num);
+		int (*master_xfer_atomic)(struct i2c_adapter *adap,
+					   struct i2c_msg *msgs, int num);
+	};
 	int (*smbus_xfer)(struct i2c_adapter *adap, u16 addr,
 			  unsigned short flags, char read_write,
 			  u8 command, int size, union i2c_smbus_data *data);
@@ -563,8 +571,14 @@ struct i2c_algorithm {
 	u32 (*functionality)(struct i2c_adapter *adap);
 
 #if IS_ENABLED(CONFIG_I2C_SLAVE)
-	int (*reg_slave)(struct i2c_client *client);
-	int (*unreg_slave)(struct i2c_client *client);
+	union {
+		int (*reg_target)(struct i2c_client *client);
+		int (*reg_slave)(struct i2c_client *client);
+	};
+	union {
+		int (*unreg_target)(struct i2c_client *client);
+		int (*unreg_slave)(struct i2c_client *client);
+	};
 #endif
 };
 
-- 
cgit v1.2.3


From 4aa7b5d1784f510c0f42afc1d74efb41947221d7 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 30 Apr 2024 07:53:04 +0930
Subject: btrfs: remove extent_map::orig_start member

Since we have extent_map::offset, the old extent_map::orig_start is just
extent_map::start - extent_map::offset for non-hole/inline extents.

And since the new extent_map::offset is already verified by
validate_extent_map() while the old orig_start is not, let's just remove
the old member from all call sites.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index fadf406b5260..44016b6dda60 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -291,7 +291,6 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__field(	u64,  ino		)
 		__field(	u64,  start		)
 		__field(	u64,  len		)
-		__field(	u64,  orig_start	)
 		__field(	u64,  block_start	)
 		__field(	u64,  block_len		)
 		__field(	u32,  flags		)
@@ -303,21 +302,18 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__entry->ino		= btrfs_ino(inode);
 		__entry->start		= map->start;
 		__entry->len		= map->len;
-		__entry->orig_start	= map->orig_start;
 		__entry->block_start	= map->block_start;
 		__entry->block_len	= map->block_len;
 		__entry->flags		= map->flags;
 		__entry->refs		= refcount_read(&map->refs);
 	),
 
-	TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu "
-		  "orig_start=%llu block_start=%llu(%s) "
-		  "block_len=%llu flags=%s refs=%u",
+	TP_printk_btrfs(
+"root=%llu(%s) ino=%llu start=%llu len=%llu block_start=%llu(%s) block_len=%llu flags=%s refs=%u",
 		  show_root_type(__entry->root_objectid),
 		  __entry->ino,
 		  __entry->start,
 		  __entry->len,
-		  __entry->orig_start,
 		  show_map_type(__entry->block_start),
 		  __entry->block_len,
 		  show_map_flags(__entry->flags),
-- 
cgit v1.2.3


From e28b851ed9b232c3b84cb8d0fedbdfa8ca881386 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 30 Apr 2024 07:53:05 +0930
Subject: btrfs: remove extent_map::block_len member

The extent_map::block_len is either extent_map::len (non-compressed
extent) or extent_map::disk_num_bytes (compressed extent).

Since we already have sanity checks to do the cross-checks between the
new and old members, we can drop the old extent_map::block_len now.

For most call sites, they can manually select extent_map::len or
extent_map::disk_num_bytes, since most if not all of them have checked
if the extent is compressed.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 44016b6dda60..69bef548818c 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -292,7 +292,6 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__field(	u64,  start		)
 		__field(	u64,  len		)
 		__field(	u64,  block_start	)
-		__field(	u64,  block_len		)
 		__field(	u32,  flags		)
 		__field(	int,  refs		)
 	),
@@ -303,19 +302,17 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__entry->start		= map->start;
 		__entry->len		= map->len;
 		__entry->block_start	= map->block_start;
-		__entry->block_len	= map->block_len;
 		__entry->flags		= map->flags;
 		__entry->refs		= refcount_read(&map->refs);
 	),
 
 	TP_printk_btrfs(
-"root=%llu(%s) ino=%llu start=%llu len=%llu block_start=%llu(%s) block_len=%llu flags=%s refs=%u",
+"root=%llu(%s) ino=%llu start=%llu len=%llu block_start=%llu(%s) flags=%s refs=%u",
 		  show_root_type(__entry->root_objectid),
 		  __entry->ino,
 		  __entry->start,
 		  __entry->len,
 		  show_map_type(__entry->block_start),
-		  __entry->block_len,
 		  show_map_flags(__entry->flags),
 		  __entry->refs)
 );
-- 
cgit v1.2.3


From c77a8c61002e91d859e118008fd495efbe1d9373 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 30 Apr 2024 07:53:06 +0930
Subject: btrfs: remove extent_map::block_start member

The member extent_map::block_start can be calculated from
extent_map::disk_bytenr + extent_map::offset for regular extents.
And otherwise just extent_map::disk_bytenr.

And this is already validated by the validate_extent_map().  Now we can
remove the member.

However there is a special case in btrfs_create_dio_extent() where we
for NOCOW/PREALLOC ordered extents cannot directly use the resulting
btrfs_file_extent, as btrfs_split_ordered_extent() cannot handle them
yet.

So for that call site, we pass file_extent->disk_bytenr +
file_extent->num_bytes as disk_bytenr for the ordered extent, and 0 for
offset.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 69bef548818c..eeb56975bee7 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -291,7 +291,6 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__field(	u64,  ino		)
 		__field(	u64,  start		)
 		__field(	u64,  len		)
-		__field(	u64,  block_start	)
 		__field(	u32,  flags		)
 		__field(	int,  refs		)
 	),
@@ -301,18 +300,15 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		__entry->ino		= btrfs_ino(inode);
 		__entry->start		= map->start;
 		__entry->len		= map->len;
-		__entry->block_start	= map->block_start;
 		__entry->flags		= map->flags;
 		__entry->refs		= refcount_read(&map->refs);
 	),
 
-	TP_printk_btrfs(
-"root=%llu(%s) ino=%llu start=%llu len=%llu block_start=%llu(%s) flags=%s refs=%u",
+	TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu flags=%s refs=%u",
 		  show_root_type(__entry->root_objectid),
 		  __entry->ino,
 		  __entry->start,
 		  __entry->len,
-		  show_map_type(__entry->block_start),
 		  show_map_flags(__entry->flags),
 		  __entry->refs)
 );
@@ -2608,7 +2604,6 @@ TRACE_EVENT(btrfs_extent_map_shrinker_remove_em,
 		__field(	u64,	root_id		)
 		__field(	u64,	start		)
 		__field(	u64,	len		)
-		__field(	u64,	block_start	)
 		__field(	u32,	flags		)
 	),
 
@@ -2617,15 +2612,12 @@ TRACE_EVENT(btrfs_extent_map_shrinker_remove_em,
 		__entry->root_id	= inode->root->root_key.objectid;
 		__entry->start		= em->start;
 		__entry->len		= em->len;
-		__entry->block_start	= em->block_start;
 		__entry->flags		= em->flags;
 	),
 
-	TP_printk_btrfs(
-"ino=%llu root=%llu(%s) start=%llu len=%llu block_start=%llu(%s) flags=%s",
+	TP_printk_btrfs("ino=%llu root=%llu(%s) start=%llu len=%llu flags=%s",
 			__entry->ino, show_root_type(__entry->root_id),
 			__entry->start, __entry->len,
-			show_map_type(__entry->block_start),
 			show_map_flags(__entry->flags))
 );
 
-- 
cgit v1.2.3


From 87128f520a6b7573f8086f2c89b9381ee7e85e51 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 10 Jun 2024 06:22:56 +0930
Subject: btrfs: uapi: record temporary super flags used by btrfstune

[BUG]
There is a bug report that a canceled checksum conversion (still
experimental feature) results in unexpected super block flags:

csum_type		0 (crc32c)
csum_size		4
csum			0x14973811 [match]
bytenr			65536
flags			0x1000000001
			( WRITTEN |
			  CHANGING_FSID_V2 )
magic			_BHRfS_M [match]

While for a filesystem with ongoing checksum conversion it should have
either CHANGING_DATA_CSUM or CHANGING_META_CSUM.

[CAUSE]
It turns out that, due to btrfs-progs keeps its own extra flags inside
its own ctree.h headers, not the shared uapi headers, we have
conflicting super flags:

kernel-shared/uapi/btrfs_tree.h:#define BTRFS_SUPER_FLAG_METADUMP_V2	(1ULL << 34)
kernel-shared/uapi/btrfs_tree.h:#define BTRFS_SUPER_FLAG_CHANGING_FSID	(1ULL << 35)
kernel-shared/uapi/btrfs_tree.h:#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36)
kernel-shared/ctree.h:#define BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM	(1ULL << 36)
kernel-shared/ctree.h:#define BTRFS_SUPER_FLAG_CHANGING_META_CSUM	(1ULL << 37)

Note that CHANGING_FSID_V2 is conflicting with CHANGING_DATA_CSUM.

[FIX]
The proper fix would be done inside btrfs-progs, but to keep everything
properly recorded, we should have everything inside the same uapi
header.

Copy all the new flags into uapi header, and change the value for
CHANGING_DATA_CSUM and CHANGING_META_CSUM, while keep the value of
CHANGING_BG_TREE untouched.

Thankfully checksum change is still only experimental and all those
CHANGING_* flags are transient (only for btrfs-progs to resume the
conversion, and kernel will reject them all), the damage is still minor.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs_tree.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index d24e8e121507..c7636331e566 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -777,6 +777,14 @@ struct btrfs_stripe_extent {
 #define BTRFS_SUPER_FLAG_CHANGING_FSID	(1ULL << 35)
 #define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36)
 
+/*
+ * Those are temporaray flags utilized by btrfs-progs to do offline conversion.
+ * They are rejected by kernel.
+ * But still keep them all here to avoid conflicts.
+ */
+#define BTRFS_SUPER_FLAG_CHANGING_BG_TREE	(1ULL << 38)
+#define BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM	(1ULL << 39)
+#define BTRFS_SUPER_FLAG_CHANGING_META_CSUM	(1ULL << 40)
 
 /*
  * items in the extent btree are used to record the objectid of the
-- 
cgit v1.2.3


From 2422547e99f99f952d1a37f26b63289f749d5fcd Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Mon, 13 May 2024 18:01:43 +0200
Subject: btrfs: remove raid-stripe-tree encoding field from stripe_extent

Remove the encoding field from 'struct btrfs_stripe_extent'. It was
originally intended to encode the RAID type as well as if we're a data
or a parity stripe.

But the RAID type can be inferred form the block-group and the data vs.
parity differentiation can be done easier with adding a new key type
for parity stripes in the RAID stripe tree.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs_tree.h | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index c7636331e566..fc29d273845d 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -747,21 +747,9 @@ struct btrfs_raid_stride {
 	__le64 physical;
 } __attribute__ ((__packed__));
 
-/* The stripe_extent::encoding, 1:1 mapping of enum btrfs_raid_types. */
-#define BTRFS_STRIPE_RAID0	1
-#define BTRFS_STRIPE_RAID1	2
-#define BTRFS_STRIPE_DUP	3
-#define BTRFS_STRIPE_RAID10	4
-#define BTRFS_STRIPE_RAID5	5
-#define BTRFS_STRIPE_RAID6	6
-#define BTRFS_STRIPE_RAID1C3	7
-#define BTRFS_STRIPE_RAID1C4	8
-
 struct btrfs_stripe_extent {
-	__u8 encoding;
-	__u8 reserved[7];
 	/* An array of raid strides this stripe is composed of. */
-	struct btrfs_raid_stride strides[];
+	__DECLARE_FLEX_ARRAY(struct btrfs_raid_stride, strides);
 } __attribute__ ((__packed__));
 
 #define BTRFS_HEADER_FLAG_WRITTEN	(1ULL << 0)
-- 
cgit v1.2.3


From 63347fe031e3bd738a2a16aee8eba889376e49a8 Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Wed, 3 Jul 2024 09:48:01 -0700
Subject: drm/xe/uapi: Rename xe perf layer as xe observation layer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In Xe, the perf layer allows capture of HW counter streams. These HW
counters are generally performance related but don't have to be necessarily
so. Also, the name "perf" is a carryover from i915 and is not preferred.

Here we propose the name "observation" for this common layer which allows
capture of different types of these counter streams.

v2: Rename observability layer to observation layer (Lucas/Rodrigo)
v3: Rename sysctl file to "observation_paranoid" (Jose)

Fixes: 52c2e956dceb ("drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types")
Fixes: fe8929bdf835 ("drm/xe/perf/uapi: Add perf_stream_paranoid sysctl")
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240703164801.2561423-1-ashutosh.dixit@intel.com
(cherry picked from commit 8169b2097d88d99d7e4a72e20e4b549efe9eb8d7)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 include/uapi/drm/xe_drm.h | 102 +++++++++++++++++++++++-----------------------
 1 file changed, 52 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 12eaa8532b5c..33544ef78d3e 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -80,7 +80,7 @@ extern "C" {
  *  - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
  *  - &DRM_IOCTL_XE_EXEC
  *  - &DRM_IOCTL_XE_WAIT_USER_FENCE
- *  - &DRM_IOCTL_XE_PERF
+ *  - &DRM_IOCTL_XE_OBSERVATION
  */
 
 /*
@@ -101,7 +101,7 @@ extern "C" {
 #define DRM_XE_EXEC_QUEUE_GET_PROPERTY	0x08
 #define DRM_XE_EXEC			0x09
 #define DRM_XE_WAIT_USER_FENCE		0x0a
-#define DRM_XE_PERF			0x0b
+#define DRM_XE_OBSERVATION		0x0b
 
 /* Must be kept compact -- no holes */
 
@@ -116,7 +116,7 @@ extern "C" {
 #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY	DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
 #define DRM_IOCTL_XE_EXEC			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE		DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
-#define DRM_IOCTL_XE_PERF			DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param)
+#define DRM_IOCTL_XE_OBSERVATION		DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param)
 
 /**
  * DOC: Xe IOCTL Extensions
@@ -1376,66 +1376,67 @@ struct drm_xe_wait_user_fence {
 };
 
 /**
- * enum drm_xe_perf_type - Perf stream types
+ * enum drm_xe_observation_type - Observation stream types
  */
-enum drm_xe_perf_type {
-	/** @DRM_XE_PERF_TYPE_OA: OA perf stream type */
-	DRM_XE_PERF_TYPE_OA,
+enum drm_xe_observation_type {
+	/** @DRM_XE_OBSERVATION_TYPE_OA: OA observation stream type */
+	DRM_XE_OBSERVATION_TYPE_OA,
 };
 
 /**
- * enum drm_xe_perf_op - Perf stream ops
+ * enum drm_xe_observation_op - Observation stream ops
  */
-enum drm_xe_perf_op {
-	/** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */
-	DRM_XE_PERF_OP_STREAM_OPEN,
+enum drm_xe_observation_op {
+	/** @DRM_XE_OBSERVATION_OP_STREAM_OPEN: Open an observation stream */
+	DRM_XE_OBSERVATION_OP_STREAM_OPEN,
 
-	/** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */
-	DRM_XE_PERF_OP_ADD_CONFIG,
+	/** @DRM_XE_OBSERVATION_OP_ADD_CONFIG: Add observation stream config */
+	DRM_XE_OBSERVATION_OP_ADD_CONFIG,
 
-	/** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */
-	DRM_XE_PERF_OP_REMOVE_CONFIG,
+	/** @DRM_XE_OBSERVATION_OP_REMOVE_CONFIG: Remove observation stream config */
+	DRM_XE_OBSERVATION_OP_REMOVE_CONFIG,
 };
 
 /**
- * struct drm_xe_perf_param - Input of &DRM_XE_PERF
+ * struct drm_xe_observation_param - Input of &DRM_XE_OBSERVATION
  *
- * The perf layer enables multiplexing perf counter streams of multiple
- * types. The actual params for a particular stream operation are supplied
- * via the @param pointer (use __copy_from_user to get these params).
+ * The observation layer enables multiplexing observation streams of
+ * multiple types. The actual params for a particular stream operation are
+ * supplied via the @param pointer (use __copy_from_user to get these
+ * params).
  */
-struct drm_xe_perf_param {
+struct drm_xe_observation_param {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
-	/** @perf_type: Perf stream type, of enum @drm_xe_perf_type */
-	__u64 perf_type;
-	/** @perf_op: Perf op, of enum @drm_xe_perf_op */
-	__u64 perf_op;
+	/** @observation_type: observation stream type, of enum @drm_xe_observation_type */
+	__u64 observation_type;
+	/** @observation_op: observation stream op, of enum @drm_xe_observation_op */
+	__u64 observation_op;
 	/** @param: Pointer to actual stream params */
 	__u64 param;
 };
 
 /**
- * enum drm_xe_perf_ioctls - Perf fd ioctl's
+ * enum drm_xe_observation_ioctls - Observation stream fd ioctl's
  *
- * Information exchanged between userspace and kernel for perf fd ioctl's
- * is stream type specific
+ * Information exchanged between userspace and kernel for observation fd
+ * ioctl's is stream type specific
  */
-enum drm_xe_perf_ioctls {
-	/** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */
-	DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0),
+enum drm_xe_observation_ioctls {
+	/** @DRM_XE_OBSERVATION_IOCTL_ENABLE: Enable data capture for an observation stream */
+	DRM_XE_OBSERVATION_IOCTL_ENABLE = _IO('i', 0x0),
 
-	/** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */
-	DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1),
+	/** @DRM_XE_OBSERVATION_IOCTL_DISABLE: Disable data capture for a observation stream */
+	DRM_XE_OBSERVATION_IOCTL_DISABLE = _IO('i', 0x1),
 
-	/** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */
-	DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2),
+	/** @DRM_XE_OBSERVATION_IOCTL_CONFIG: Change observation stream configuration */
+	DRM_XE_OBSERVATION_IOCTL_CONFIG = _IO('i', 0x2),
 
-	/** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */
-	DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3),
+	/** @DRM_XE_OBSERVATION_IOCTL_STATUS: Return observation stream status */
+	DRM_XE_OBSERVATION_IOCTL_STATUS = _IO('i', 0x3),
 
-	/** @DRM_XE_PERF_IOCTL_INFO: Return stream info */
-	DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4),
+	/** @DRM_XE_OBSERVATION_IOCTL_INFO: Return observation stream info */
+	DRM_XE_OBSERVATION_IOCTL_INFO = _IO('i', 0x4),
 };
 
 /**
@@ -1546,12 +1547,12 @@ enum drm_xe_oa_format_type {
  * Stream params are specified as a chain of @drm_xe_ext_set_property
  * struct's, with @property values from enum @drm_xe_oa_property_id and
  * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY.
- * @param field in struct @drm_xe_perf_param points to the first
+ * @param field in struct @drm_xe_observation_param points to the first
  * @drm_xe_ext_set_property struct.
  *
- * Exactly the same mechanism is also used for stream reconfiguration using
- * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of
- * properties below can be specified for stream reconfiguration.
+ * Exactly the same mechanism is also used for stream reconfiguration using the
+ * @DRM_XE_OBSERVATION_IOCTL_CONFIG observation stream fd ioctl, though only a
+ * subset of properties below can be specified for stream reconfiguration.
  */
 enum drm_xe_oa_property_id {
 #define DRM_XE_OA_EXTENSION_SET_PROPERTY	0
@@ -1571,11 +1572,11 @@ enum drm_xe_oa_property_id {
 
 	/**
 	 * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA
-	 * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG.
+	 * reports, previously added via @DRM_XE_OBSERVATION_OP_ADD_CONFIG.
 	 */
 	DRM_XE_OA_PROPERTY_OA_METRIC_SET,
 
-	/** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */
+	/** @DRM_XE_OA_PROPERTY_OA_FORMAT: OA counter report format */
 	DRM_XE_OA_PROPERTY_OA_FORMAT,
 	/*
 	 * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942,
@@ -1596,13 +1597,13 @@ enum drm_xe_oa_property_id {
 
 	/**
 	 * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA
-	 * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE).
+	 * stream in a DISABLED state (see @DRM_XE_OBSERVATION_IOCTL_ENABLE).
 	 */
 	DRM_XE_OA_PROPERTY_OA_DISABLED,
 
 	/**
 	 * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific
-	 * @exec_queue_id. Perf queries can be executed on this exec queue.
+	 * @exec_queue_id. OA queries can be executed on this exec queue.
 	 */
 	DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID,
 
@@ -1622,7 +1623,7 @@ enum drm_xe_oa_property_id {
 /**
  * struct drm_xe_oa_config - OA metric configuration
  *
- * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A
+ * Multiple OA configs can be added using @DRM_XE_OBSERVATION_OP_ADD_CONFIG. A
  * particular config can be specified when opening an OA stream using
  * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property.
  */
@@ -1645,8 +1646,9 @@ struct drm_xe_oa_config {
 
 /**
  * struct drm_xe_oa_stream_status - OA stream status returned from
- * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to
- * query stream status in response to EIO errno from perf fd read().
+ * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl. Userspace can
+ * call the ioctl to query stream status in response to EIO errno from
+ * observation fd read().
  */
 struct drm_xe_oa_stream_status {
 	/** @extensions: Pointer to the first extension struct, if any */
@@ -1665,7 +1667,7 @@ struct drm_xe_oa_stream_status {
 
 /**
  * struct drm_xe_oa_stream_info - OA stream info returned from
- * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl
+ * @DRM_XE_OBSERVATION_IOCTL_INFO observation stream fd ioctl
  */
 struct drm_xe_oa_stream_info {
 	/** @extensions: Pointer to the first extension struct, if any */
-- 
cgit v1.2.3


From bcea31e2d1c7a34aeeae9458f38833d5a8409cf7 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 10 Jul 2024 11:37:18 +0200
Subject: x86/xen: eliminate some private header files

Under arch/x86/xen there is one large private header file xen-ops.h
containing most of the Xen-private x86 related declarations, and then
there are several small headers with a handful of declarations each.

Merge the small headers into xen-ops.h.

While doing that, move the declaration of xen_fifo_events from
xen-ops.h into include/xen/events.h where it should have been from the
beginning.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Message-ID: <20240710093718.14552-3-jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 include/xen/events.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/xen/events.h b/include/xen/events.h
index 3b07409f8032..de5da58a0205 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -144,4 +144,6 @@ static inline void xen_evtchn_close(evtchn_port_t port)
 		BUG();
 }
 
+extern bool xen_fifo_events;
+
 #endif	/* _XEN_EVENTS_H */
-- 
cgit v1.2.3


From 4484940514295b75389f94787f8e179ba6255353 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 8 Jul 2024 15:42:45 +0100
Subject: btrfs: avoid races when tracking progress for extent map shrinking

We store the progress (root and inode numbers) of the extent map shrinker
in fs_info without any synchronization but we can have multiple tasks
calling into the shrinker during memory allocations when there's enough
memory pressure for example.

This can result in a task A reading fs_info->extent_map_shrinker_last_ino
after another task B updates it, and task A reading
fs_info->extent_map_shrinker_last_root before task B updates it, making
task A see an odd state that isn't necessarily harmful but may make it
skip certain inode ranges or do more work than necessary by going over
the same inodes again. These unprotected accesses would also trigger
warnings from tools like KCSAN.

So add a lock to protect access to these progress fields.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index d2d94d7c3fb5..0067e8443d38 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -2556,9 +2556,10 @@ TRACE_EVENT(btrfs_extent_map_shrinker_count,
 
 TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
 
-	TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr),
+	TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr,
+		 u64 last_root_id, u64 last_ino),
 
-	TP_ARGS(fs_info, nr_to_scan, nr),
+	TP_ARGS(fs_info, nr_to_scan, nr, last_root_id, last_ino),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	long,	nr_to_scan	)
@@ -2570,8 +2571,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
 	TP_fast_assign_btrfs(fs_info,
 		__entry->nr_to_scan	= nr_to_scan;
 		__entry->nr		= nr;
-		__entry->last_root_id	= fs_info->extent_map_shrinker_last_root;
-		__entry->last_ino	= fs_info->extent_map_shrinker_last_ino;
+		__entry->last_root_id	= last_root_id;
+		__entry->last_ino	= last_ino;
 	),
 
 	TP_printk_btrfs("nr_to_scan=%ld nr=%ld last_root=%llu(%s) last_ino=%llu",
@@ -2581,9 +2582,10 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
 
 TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit,
 
-	TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr),
+	TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr,
+		 u64 last_root_id, u64 last_ino),
 
-	TP_ARGS(fs_info, nr_dropped, nr),
+	TP_ARGS(fs_info, nr_dropped, nr, last_root_id, last_ino),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	long,	nr_dropped	)
@@ -2595,8 +2597,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit,
 	TP_fast_assign_btrfs(fs_info,
 		__entry->nr_dropped	= nr_dropped;
 		__entry->nr		= nr;
-		__entry->last_root_id	= fs_info->extent_map_shrinker_last_root;
-		__entry->last_ino	= fs_info->extent_map_shrinker_last_ino;
+		__entry->last_root_id	= last_root_id;
+		__entry->last_ino	= last_ino;
 	),
 
 	TP_printk_btrfs("nr_dropped=%ld nr=%ld last_root=%llu(%s) last_ino=%llu",
-- 
cgit v1.2.3


From 5207c393d3e7dda9aff813d6b3e2264370d241be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Fri, 5 Jul 2024 15:28:28 +0200
Subject: drm/xe: Use write-back caching mode for system memory on DGFX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The caching mode for buffer objects with VRAM as a possible
placement was forced to write-combined, regardless of placement.

However, write-combined system memory is expensive to allocate and
even though it is pooled, the pool is expensive to shrink, since
it involves global CPU TLB flushes.

Moreover write-combined system memory from TTM is only reliably
available on x86 and DGFX doesn't have an x86 restriction.

So regardless of the cpu caching mode selected for a bo,
internally use write-back caching mode for system memory on DGFX.

Coherency is maintained, but user-space clients may perceive a
difference in cpu access speeds.

v2:
- Update RB- and Ack tags.
- Rephrase wording in xe_drm.h (Matt Roper)
v3:
- Really rephrase wording.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Cc: Pallavi Mishra <pallavi.mishra@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Effie Yu <effie.yu@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Jose Souza <jose.souza@intel.com>
Cc: Michal Mrozek <michal.mrozek@intel.com>
Cc: <stable@vger.kernel.org> # v6.8+
Acked-by: Matthew Auld <matthew.auld@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode")
Acked-by: Michal Mrozek <michal.mrozek@intel.com>
Acked-by: Effie Yu <effie.yu@intel.com> #On chat
Link: https://patchwork.freedesktop.org/patch/msgid/20240705132828.27714-1-thomas.hellstrom@linux.intel.com
(cherry picked from commit 01e0cfc994be484ddcb9e121e353e51d8bb837c0)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 include/uapi/drm/xe_drm.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 1446c3bae515..d425b83181df 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -776,7 +776,13 @@ struct drm_xe_gem_create {
 #define DRM_XE_GEM_CPU_CACHING_WC                      2
 	/**
 	 * @cpu_caching: The CPU caching mode to select for this object. If
-	 * mmaping the object the mode selected here will also be used.
+	 * mmaping the object the mode selected here will also be used. The
+	 * exception is when mapping system memory (including data evicted
+	 * to system) on discrete GPUs. The caching mode selected will
+	 * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency
+	 * between GPU- and CPU is guaranteed. The caching mode of
+	 * existing CPU-mappings will be updated transparently to
+	 * user-space clients.
 	 */
 	__u16 cpu_caching;
 	/** @pad: MBZ */
-- 
cgit v1.2.3


From b4e891901ed5409f4c33d12fd92011be925ef5b3 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Wed, 12 Jun 2024 17:00:38 +0100
Subject: fixmap: Remove unused set_fixmap_offset_io()

The macro set_fixmap_offset_io() was added in commit f774b7d10e21
("arm64: fixmap: fix missing sub-page offset for earlyprintk") but then
commit 8ef0ed95ee04 ("arm64: remove arch specific earlyprintk") removed
the file causing the only user to be removed when the two commits were
merged. Since this has never been used again since the v3.15 release
remove it.

Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/fixmap.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h
index 8cc7b09c1bc7..29cab7947980 100644
--- a/include/asm-generic/fixmap.h
+++ b/include/asm-generic/fixmap.h
@@ -97,8 +97,5 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
 #define set_fixmap_io(idx, phys) \
 	__set_fixmap(idx, phys, FIXMAP_PAGE_IO)
 
-#define set_fixmap_offset_io(idx, phys) \
-	__set_fixmap_offset(idx, phys, FIXMAP_PAGE_IO)
-
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_GENERIC_FIXMAP_H */
-- 
cgit v1.2.3


From a808878308a8041ae10a151d69e2d22f94cae9f4 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Sun, 14 Apr 2024 11:05:25 +0300
Subject: driver core: auxiliary bus: show auxiliary device IRQs

PCI subfunctions (SF) are anchored on the auxiliary bus. PCI physical
and virtual functions are anchored on the PCI bus. The irq information
of each such function is visible to users via sysfs directory "msi_irqs"
containing files for each irq entry. However, for PCI SFs such
information is unavailable. Due to this users have no visibility on IRQs
used by the SFs.
Secondly, an SF can be multi function device supporting rdma, netdevice
and more. Without irq information at the bus level, the user is unable
to view or use the affinity of the SF IRQs.

Hence to match to the equivalent PCI PFs and VFs, add "irqs" directory,
for supporting auxiliary devices, containing file for each irq entry.

For example:
$ ls /sys/bus/auxiliary/devices/mlx5_core.sf.1/irqs/
50  51  52  53  54  55  56  57  58

Cc: Simon Horman <horms@kernel.org>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>

---
v9-v10:
- remove Przemek RB
- add name field to auxiliary_irq_info (Greg and Przemek)
- handle bogus IRQ in auxiliary_device_sysfs_irq_remove (Greg)
v8-v9:
- add Przemek RB
- use guard() in auxiliary_irq_dir_prepare (Paolo)
v7-v8:
- use cleanup.h for info and name fields (Greg)
- correct error flow in auxiliary_irq_dir_prepare (Przemek)
- add documentation for new fields of auxiliary_device (Simon)
v6-v7:
- dynamically creating irqs directory when first irq file created (Greg)
- removed irqs flag and simplified the dev_add() API (Greg)
- move sysfs related new code to a new auxiliary_sysfs.c file (Greg)
v5-v6:
- removed concept of shared and exclusive and hence global xarray (Greg)
v4-v5:
- restore global mutex and replace refcount_t with simple integer (Greg)
v3->4:
- remove global mutex (Przemek)
v2->v3:
- fix function declaration in case SYSFS isn't defined
v1->v2:
- move #ifdefs from drivers/base/auxiliary.c to
  include/linux/auxiliary_bus.h (Greg)
- use EXPORT_SYMBOL_GPL instead of EXPORT_SYMBOL (Greg)
- Fix kzalloc(ref) to kzalloc(*ref) (Simon)
- Add return description in auxiliary_device_sysfs_irq_add() kdoc (Simon)
- Fix auxiliary_irq_mode_show doc (kernel test boot)
---
 include/linux/auxiliary_bus.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index de21d9d24a95..3ba4487c9cd9 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -58,6 +58,9 @@
  *       in
  * @name: Match name found by the auxiliary device driver,
  * @id: unique identitier if multiple devices of the same name are exported,
+ * @irqs: irqs xarray contains irq indices which are used by the device,
+ * @lock: Synchronize irq sysfs creation,
+ * @irq_dir_exists: whether "irqs" directory exists,
  *
  * An auxiliary_device represents a part of its parent device's functionality.
  * It is given a name that, combined with the registering drivers
@@ -139,6 +142,11 @@ struct auxiliary_device {
 	struct device dev;
 	const char *name;
 	u32 id;
+	struct {
+		struct xarray irqs;
+		struct mutex lock; /* Synchronize irq sysfs creation */
+		bool irq_dir_exists;
+	} sysfs;
 };
 
 /**
@@ -212,8 +220,24 @@ int auxiliary_device_init(struct auxiliary_device *auxdev);
 int __auxiliary_device_add(struct auxiliary_device *auxdev, const char *modname);
 #define auxiliary_device_add(auxdev) __auxiliary_device_add(auxdev, KBUILD_MODNAME)
 
+#ifdef CONFIG_SYSFS
+int auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq);
+void auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev,
+				       int irq);
+#else /* CONFIG_SYSFS */
+static inline int
+auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq)
+{
+	return 0;
+}
+
+static inline void
+auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, int irq) {}
+#endif
+
 static inline void auxiliary_device_uninit(struct auxiliary_device *auxdev)
 {
+	mutex_destroy(&auxdev->sysfs.lock);
 	put_device(&auxdev->dev);
 }
 
-- 
cgit v1.2.3


From ad78e05d654567e0a96f91d5db198469ddc2d4fb Mon Sep 17 00:00:00 2001
From: Philipp Stanner <pstanner@redhat.com>
Date: Thu, 13 Jun 2024 13:50:25 +0200
Subject: PCI: Add managed pcim_iomap_range()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The only managed mapping function currently is pcim_iomap() which doesn't
allow for mapping an area starting at a certain offset, which many drivers
want.

Add pcim_iomap_range() as an exported function.

Link: https://lore.kernel.org/r/20240613115032.29098-13-pstanner@redhat.com
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Krzysztof Wilczyński <kwilczynski@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0c19f0717899..98893a89bb5b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2303,6 +2303,8 @@ int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name);
 int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask,
 				   const char *name);
 void pcim_iounmap_regions(struct pci_dev *pdev, int mask);
+void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar,
+				unsigned long offset, unsigned long len);
 
 extern int pci_pci_problems;
 #define PCIPCI_FAIL		1	/* No PCI PCI DMA */
-- 
cgit v1.2.3


From 861f96a785149a0062cce6578e0fa7cb95435a7e Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 10 Jul 2024 16:33:39 +0800
Subject: iommufd: Remove IOMMUFD_PAGE_RESP_FAILURE

The response code of IOMMUFD_PAGE_RESP_FAILURE was defined to be
equivalent to the "Response Failure" in PCI spec, section 10.4.2.1.
This response code indicates that one or more pages within the
associated request group have encountered or caused an unrecoverable
error. Therefore, this response disables the PRI at the function.

Modern I/O virtualization technologies, like SR-IOV, share PRI among
the assignable device units. Therefore, a response failure on one unit
might cause I/O failure on other units.

Remove this response code so that user space can only respond with
SUCCESS or INVALID. The VMM is recommended to emulate a failure response
as a PRI reset, or PRI disable and changing to a non-PRI domain.

Fixes: c714f15860fc ("iommufd: Add fault and response message definitions")
Link: https://lore.kernel.org/r/20240710083341.44617-2-baolu.lu@linux.intel.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index ede2b464a761..e31385b75d0b 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -765,14 +765,10 @@ struct iommu_hwpt_pgfault {
  * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
  *                             access. This is the "Invalid Request" in PCI
  *                             10.4.2.1.
- * @IOMMUFD_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from
- *                             this device if possible. This is the "Response
- *                             Failure" in PCI 10.4.2.1.
  */
 enum iommufd_page_response_code {
 	IOMMUFD_PAGE_RESP_SUCCESS = 0,
 	IOMMUFD_PAGE_RESP_INVALID,
-	IOMMUFD_PAGE_RESP_FAILURE,
 };
 
 /**
-- 
cgit v1.2.3


From 8341eee81c794db0d8dd503c2b0ea2f55eba7334 Mon Sep 17 00:00:00 2001
From: Adrian Moreno <amorenoz@redhat.com>
Date: Wed, 10 Jul 2024 19:10:04 +0200
Subject: net: psample: fix flag being set in wrong skb

A typo makes PSAMPLE_ATTR_SAMPLE_RATE netlink flag be added to the wrong
sk_buff.

Fix the error and make the input sk_buff pointer "const" so that it
doesn't happen again.

Acked-by: Eelco Chaudron <echaudro@redhat.com>
Fixes: 7b1b2b60c63f ("net: psample: allow using rate as probability")
Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Antoine Tenart <atenart@kernel.org>
Link: https://patch.msgid.link/20240710171004.2164034-1-amorenoz@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/psample.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/psample.h b/include/net/psample.h
index c52e9ebd88dd..5071b5fc2b59 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -38,13 +38,15 @@ struct sk_buff;
 
 #if IS_ENABLED(CONFIG_PSAMPLE)
 
-void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
-			   u32 sample_rate, const struct psample_metadata *md);
+void psample_sample_packet(struct psample_group *group,
+			   const struct sk_buff *skb, u32 sample_rate,
+			   const struct psample_metadata *md);
 
 #else
 
 static inline void psample_sample_packet(struct psample_group *group,
-					 struct sk_buff *skb, u32 sample_rate,
+					 const struct sk_buff *skb,
+					 u32 sample_rate,
 					 const struct psample_metadata *md)
 {
 }
-- 
cgit v1.2.3


From 13cabc47f8ae69d24653f32c28399d493fde0a56 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Wed, 10 Jul 2024 04:30:28 -0700
Subject: netdevice: define and allocate &net_device _properly_

In fact, this structure contains a flexible array at the end, but
historically its size, alignment etc., is calculated manually.
There are several instances of the structure embedded into other
structures, but also there's ongoing effort to remove them and we
could in the meantime declare &net_device properly.
Declare the array explicitly, use struct_size() and store the array
size inside the structure, so that __counted_by() can be applied.
Don't use PTR_ALIGN(), as SLUB itself tries its best to ensure the
allocated buffer is aligned to what the user expects.
Also, change its alignment from %NETDEV_ALIGN to the cacheline size
as per several suggestions on the netdev ML.

bloat-o-meter for vmlinux:

free_netdev                                  445     440      -5
netdev_freemem                                24       -     -24
alloc_netdev_mqs                            1481    1450     -31

On x86_64 with several NICs of different vendors, I was never able to
get a &net_device pointer not aligned to the cacheline size after the
change.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kees Cook <kees@kernel.org>
Link: https://patch.msgid.link/20240710113036.2125584-1-leitao@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 93558645c6d0..607009150b5f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1819,7 +1819,8 @@ enum netdev_reg_state {
  *	@priv_flags:	Like 'flags' but invisible to userspace,
  *			see if.h for the definitions
  *	@gflags:	Global flags ( kept as legacy )
- *	@padded:	How much padding added by alloc_netdev()
+ *	@priv_len:	Size of the ->priv flexible array
+ *	@priv:		Flexible array containing private data
  *	@operstate:	RFC2863 operstate
  *	@link_mode:	Mapping policy to operstate
  *	@if_port:	Selectable AUI, TP, ...
@@ -2199,10 +2200,10 @@ struct net_device {
 	unsigned short		neigh_priv_len;
 	unsigned short          dev_id;
 	unsigned short          dev_port;
-	unsigned short		padded;
+	int			irq;
+	u32			priv_len;
 
 	spinlock_t		addr_list_lock;
-	int			irq;
 
 	struct netdev_hw_addr_list	uc;
 	struct netdev_hw_addr_list	mc;
@@ -2406,7 +2407,10 @@ struct net_device {
 
 	/** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
 	struct dim_irq_moder	*irq_moder;
-};
+
+	u8			priv[] ____cacheline_aligned
+				       __counted_by(priv_len);
+} ____cacheline_aligned;
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
 /*
@@ -2596,7 +2600,7 @@ void dev_net_set(struct net_device *dev, struct net *net)
  */
 static inline void *netdev_priv(const struct net_device *dev)
 {
-	return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN);
+	return (void *)dev->priv;
 }
 
 /* Set the sysfs physical device reference for the network logical device
@@ -3127,7 +3131,6 @@ static inline void unregister_netdevice(struct net_device *dev)
 
 int netdev_refcnt_read(const struct net_device *dev);
 void free_netdev(struct net_device *dev);
-void netdev_freemem(struct net_device *dev);
 void init_dummy_netdev(struct net_device *dev);
 
 struct net_device *netdev_get_xmit_slave(struct net_device *dev,
-- 
cgit v1.2.3


From 06dcc4c9baa9e92896f00d02ffa760c0988b4371 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Fri, 12 Jul 2024 09:30:10 +0900
Subject: firewire: core: move copy_port_status() helper function to
 TP_fast_assign() block

It would be possible to put any statement in TP_fast_assign().

This commit obsoletes the helper function and put its statements to
TP_fast_assign() for the code simplicity.

Link: https://lore.kernel.org/r/20240712003010.87341-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/trace/events/firewire.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h
index bac9d98e88e5..b108176deb22 100644
--- a/include/trace/events/firewire.h
+++ b/include/trace/events/firewire.h
@@ -392,9 +392,6 @@ TRACE_EVENT(bus_reset_handle,
 #define PHY_PACKET_SELF_ID_GET_INITIATED_RESET(quads)	\
 	((((const u32 *)quads)[0] & SELF_ID_ZERO_INITIATED_RESET_MASK) >> SELF_ID_ZERO_INITIATED_RESET_SHIFT)
 
-void copy_port_status(u8 *port_status, unsigned int port_capacity, const u32 *self_id_sequence,
-		      unsigned int quadlet_count);
-
 TRACE_EVENT(self_id_sequence,
 	TP_PROTO(unsigned int card_index, const u32 *self_id_sequence, unsigned int quadlet_count, unsigned int generation),
 	TP_ARGS(card_index, self_id_sequence, quadlet_count, generation),
@@ -407,8 +404,16 @@ TRACE_EVENT(self_id_sequence,
 	TP_fast_assign(
 		__entry->card_index = card_index;
 		__entry->generation = generation;
-		copy_port_status(__get_dynamic_array(port_status), __get_dynamic_array_len(port_status),
-				 self_id_sequence, quadlet_count);
+		{
+			u8 *port_status = __get_dynamic_array(port_status);
+			unsigned int port_index;
+
+			for (port_index = 0; port_index < __get_dynamic_array_len(port_status); ++port_index) {
+				port_status[port_index] =
+					self_id_sequence_get_port_status(self_id_sequence,
+									 quadlet_count, port_index);
+			}
+		}
 		memcpy(__get_dynamic_array(self_id_sequence), self_id_sequence,
 					   __get_dynamic_array_len(self_id_sequence));
 	),
-- 
cgit v1.2.3


From 1a7b7326d587c9a5e8ff067e70d6aaf0333f4bb3 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 12 Jul 2024 07:51:58 +0200
Subject: vmlinux.lds.h: catch .bss..L* sections into BSS")

Commit 9a427556fb8e ("vmlinux.lds.h: catch compound literals into
data and BSS") added catches for .data..L* and .rodata..L* but missed
.bss..L*

Since commit 5431fdd2c181 ("ptrace: Convert ptrace_attach() to use
lock guards") the following appears at build:

  LD      .tmp_vmlinux.kallsyms1
powerpc64-linux-ld: warning: orphan section `.bss..Lubsan_data33' from `kernel/ptrace.o' being placed in section `.bss..Lubsan_data33'
  NM      .tmp_vmlinux.kallsyms1.syms
  KSYMS   .tmp_vmlinux.kallsyms1.S
  AS      .tmp_vmlinux.kallsyms1.S
  LD      .tmp_vmlinux.kallsyms2
powerpc64-linux-ld: warning: orphan section `.bss..Lubsan_data33' from `kernel/ptrace.o' being placed in section `.bss..Lubsan_data33'
  NM      .tmp_vmlinux.kallsyms2.syms
  KSYMS   .tmp_vmlinux.kallsyms2.S
  AS      .tmp_vmlinux.kallsyms2.S
  LD      vmlinux
powerpc64-linux-ld: warning: orphan section `.bss..Lubsan_data33' from `kernel/ptrace.o' being placed in section `.bss..Lubsan_data33'

Lets add .bss..L* to BSS_MAIN macro to catch those sections into BSS.

Fixes: 9a427556fb8e ("vmlinux.lds.h: catch compound literals into data and BSS")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202404031349.nmKhyuUG-lkp@intel.com/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5703526d6ebf..70bf1004076b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -103,7 +103,7 @@
 #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L*
 #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
 #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L*
-#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral*
+#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral*
 #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]*
 #else
 #define TEXT_MAIN .text
-- 
cgit v1.2.3


From 887c4cf5594a073fd60c0df84150eb06d78c6406 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Thu, 11 Jul 2024 10:11:12 -0700
Subject: efi: Rename efi_early_memdesc_ptr() to efi_memdesc_ptr()

The "early" part of the helper's name isn't accurate[1]. Drop it in
preparation for adding a new (not early) usage.

Suggested-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/lkml/CAMj1kXEyDjH0uu3Z4eBesV3PEnKGi5ArXXMp7R-hn8HdRytiPg@mail.gmail.com [1]
Signed-off-by: Kees Cook <kees@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 3a6c04a9f9aa..67b2fa80c8b9 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -784,7 +784,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm,
 					 efi_memattr_perm_setter fn);
 
 /*
- * efi_early_memdesc_ptr - get the n-th EFI memmap descriptor
+ * efi_memdesc_ptr - get the n-th EFI memmap descriptor
  * @map: the start of efi memmap
  * @desc_size: the size of space for each EFI memmap descriptor
  * @n: the index of efi memmap descriptor
@@ -802,7 +802,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm,
  * during bootup since for_each_efi_memory_desc_xxx() is available after the
  * kernel initializes the EFI subsystem to set up struct efi_memory_map.
  */
-#define efi_early_memdesc_ptr(map, desc_size, n)			\
+#define efi_memdesc_ptr(map, desc_size, n)			\
 	(efi_memory_desc_t *)((void *)(map) + ((n) * (desc_size)))
 
 /* Iterate through an efi_memory_map */
-- 
cgit v1.2.3


From 4a2ebb082297f41803742729642961532e54079e Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Thu, 11 Jul 2024 10:11:13 -0700
Subject: efi: Replace efi_memory_attributes_table_t 0-sized array with
 flexible array

While efi_memory_attributes_table_t::entry isn't used directly as an
array, it is used as a base for pointer arithmetic. The type is wrong
as it's not technically an array of efi_memory_desc_t's; they could be
larger. Regardless, leave the type unchanged and remove the old style
"0" array size. Additionally replace the open-coded entry offset code
with the existing efi_memdesc_ptr() helper.

Signed-off-by: Kees Cook <kees@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/efi.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 67b2fa80c8b9..6bf3c4fe8511 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -608,7 +608,11 @@ typedef struct {
 	u32 num_entries;
 	u32 desc_size;
 	u32 flags;
-	efi_memory_desc_t entry[0];
+	/*
+	 * There are @num_entries following, each of size @desc_size bytes,
+	 * including an efi_memory_desc_t header. See efi_memdesc_ptr().
+	 */
+	efi_memory_desc_t entry[];
 } efi_memory_attributes_table_t;
 
 typedef struct {
-- 
cgit v1.2.3


From 79cf9c6ee44e0af7e1383365d29c64eece6665bb Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Date: Fri, 28 Jun 2024 15:29:42 +0200
Subject: media: uapi: pisp_be_config: Drop BIT() from uAPI

The pisp_be_config.h uAPI header file contains a bit-field definition
that uses the BIT() helper macro.

As the BIT() identifier is not defined in userspace, drop it from the
uAPI header.

Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI")
Signed-off-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/uapi/linux/media/raspberrypi/pisp_be_config.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h
index 1684ae068d4f..27d0cc417d6b 100644
--- a/include/uapi/linux/media/raspberrypi/pisp_be_config.h
+++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h
@@ -146,7 +146,7 @@ struct pisp_be_dpc_config {
  */
 struct pisp_be_geq_config {
 	__u16 offset;
-#define PISP_BE_GEQ_SHARPER BIT(15)
+#define PISP_BE_GEQ_SHARPER (1U << 15)
 #define PISP_BE_GEQ_SLOPE ((1 << 10) - 1)
 	/* top bit is the "sharper" flag, slope value is bottom 10 bits */
 	__u16 slope_sharper;
-- 
cgit v1.2.3


From 1991a09e6d7c1dff5efea65b5b31082332f2e64e Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Date: Fri, 28 Jun 2024 15:29:43 +0200
Subject: media: uapi: pisp_common: Add 32 bpp format test

Add definition and test for 32-bits image formats to the pisp_common.h
uAPI header.

Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI")
Signed-off-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Acked-by: David Plowman <david.plowman@raspberrypi.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/uapi/linux/media/raspberrypi/pisp_common.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/media/raspberrypi/pisp_common.h b/include/uapi/linux/media/raspberrypi/pisp_common.h
index b2522e29c976..74d096188233 100644
--- a/include/uapi/linux/media/raspberrypi/pisp_common.h
+++ b/include/uapi/linux/media/raspberrypi/pisp_common.h
@@ -72,6 +72,8 @@ enum pisp_image_format {
 	PISP_IMAGE_FORMAT_SHIFT_8 = 0x00080000,
 	PISP_IMAGE_FORMAT_SHIFT_MASK = 0x000f0000,
 
+	PISP_IMAGE_FORMAT_BPP_32 = 0x00100000,
+
 	PISP_IMAGE_FORMAT_UNCOMPRESSED = 0x00000000,
 	PISP_IMAGE_FORMAT_COMPRESSION_MODE_1 = 0x01000000,
 	PISP_IMAGE_FORMAT_COMPRESSION_MODE_2 = 0x02000000,
@@ -134,6 +136,7 @@ enum pisp_image_format {
 	 PISP_IMAGE_FORMAT_PLANARITY_PLANAR)
 #define PISP_IMAGE_FORMAT_wallpaper(fmt)                                       \
 	((fmt) & PISP_IMAGE_FORMAT_WALLPAPER_ROLL)
+#define PISP_IMAGE_FORMAT_bpp_32(fmt) ((fmt) & PISP_IMAGE_FORMAT_BPP_32)
 #define PISP_IMAGE_FORMAT_HOG(fmt)                                             \
 	((fmt) &                                                               \
 	 (PISP_IMAGE_FORMAT_HOG_SIGNED | PISP_IMAGE_FORMAT_HOG_UNSIGNED))
-- 
cgit v1.2.3


From f5cee94f2dfe5b7625452b831f7629369ce68a7b Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Date: Fri, 28 Jun 2024 15:29:44 +0200
Subject: media: uapi: pisp_common: Capitalize all macros

The macro used to inspect an image format characteristic use a mixture
of capitalized and non-capitalized letters, which is rather unusual for
the Linux kernel style.

Capitalize all identifiers.

Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI")
Signed-off-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/uapi/linux/media/raspberrypi/pisp_common.h | 38 +++++++++++-----------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/media/raspberrypi/pisp_common.h b/include/uapi/linux/media/raspberrypi/pisp_common.h
index 74d096188233..cbdccfed1261 100644
--- a/include/uapi/linux/media/raspberrypi/pisp_common.h
+++ b/include/uapi/linux/media/raspberrypi/pisp_common.h
@@ -92,51 +92,51 @@ enum pisp_image_format {
 				     PISP_IMAGE_FORMAT_THREE_CHANNEL
 };
 
-#define PISP_IMAGE_FORMAT_bps_8(fmt)                                           \
+#define PISP_IMAGE_FORMAT_BPS_8(fmt)                                           \
 	(((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_8)
-#define PISP_IMAGE_FORMAT_bps_10(fmt)                                          \
+#define PISP_IMAGE_FORMAT_BPS_10(fmt)                                          \
 	(((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_10)
-#define PISP_IMAGE_FORMAT_bps_12(fmt)                                          \
+#define PISP_IMAGE_FORMAT_BPS_12(fmt)                                          \
 	(((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_12)
-#define PISP_IMAGE_FORMAT_bps_16(fmt)                                          \
+#define PISP_IMAGE_FORMAT_BPS_16(fmt)                                          \
 	(((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_16)
-#define PISP_IMAGE_FORMAT_bps(fmt)                                             \
+#define PISP_IMAGE_FORMAT_BPS(fmt)                                             \
 	(((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) ?                                \
 	       8 + (2 << (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) - 1)) : 8)
-#define PISP_IMAGE_FORMAT_shift(fmt)                                           \
+#define PISP_IMAGE_FORMAT_SHIFT(fmt)                                           \
 	(((fmt) & PISP_IMAGE_FORMAT_SHIFT_MASK) / PISP_IMAGE_FORMAT_SHIFT_1)
-#define PISP_IMAGE_FORMAT_three_channel(fmt)                                   \
+#define PISP_IMAGE_FORMAT_THREE_CHANNEL(fmt)                                   \
 	((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL)
-#define PISP_IMAGE_FORMAT_single_channel(fmt)                                  \
+#define PISP_IMAGE_FORMAT_SINGLE_CHANNEL(fmt)                                  \
 	(!((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL))
-#define PISP_IMAGE_FORMAT_compressed(fmt)                                      \
+#define PISP_IMAGE_FORMAT_COMPRESSED(fmt)                                      \
 	(((fmt) & PISP_IMAGE_FORMAT_COMPRESSION_MASK) !=                       \
 	 PISP_IMAGE_FORMAT_UNCOMPRESSED)
-#define PISP_IMAGE_FORMAT_sampling_444(fmt)                                    \
+#define PISP_IMAGE_FORMAT_SAMPLING_444(fmt)                                    \
 	(((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) ==                          \
 	 PISP_IMAGE_FORMAT_SAMPLING_444)
-#define PISP_IMAGE_FORMAT_sampling_422(fmt)                                    \
+#define PISP_IMAGE_FORMAT_SAMPLING_422(fmt)                                    \
 	(((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) ==                          \
 	 PISP_IMAGE_FORMAT_SAMPLING_422)
-#define PISP_IMAGE_FORMAT_sampling_420(fmt)                                    \
+#define PISP_IMAGE_FORMAT_SAMPLING_420(fmt)                                    \
 	(((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) ==                          \
 	 PISP_IMAGE_FORMAT_SAMPLING_420)
-#define PISP_IMAGE_FORMAT_order_normal(fmt)                                    \
+#define PISP_IMAGE_FORMAT_ORDER_NORMAL(fmt)                                    \
 	(!((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED))
-#define PISP_IMAGE_FORMAT_order_swapped(fmt)                                   \
+#define PISP_IMAGE_FORMAT_ORDER_SWAPPED(fmt)                                   \
 	((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED)
-#define PISP_IMAGE_FORMAT_interleaved(fmt)                                     \
+#define PISP_IMAGE_FORMAT_INTERLEAVED(fmt)                                     \
 	(((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) ==                         \
 	 PISP_IMAGE_FORMAT_PLANARITY_INTERLEAVED)
-#define PISP_IMAGE_FORMAT_semiplanar(fmt)                                      \
+#define PISP_IMAGE_FORMAT_SEMIPLANAR(fmt)                                      \
 	(((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) ==                         \
 	 PISP_IMAGE_FORMAT_PLANARITY_SEMI_PLANAR)
-#define PISP_IMAGE_FORMAT_planar(fmt)                                          \
+#define PISP_IMAGE_FORMAT_PLANAR(fmt)                                          \
 	(((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) ==                         \
 	 PISP_IMAGE_FORMAT_PLANARITY_PLANAR)
-#define PISP_IMAGE_FORMAT_wallpaper(fmt)                                       \
+#define PISP_IMAGE_FORMAT_WALLPAPER(fmt)                                       \
 	((fmt) & PISP_IMAGE_FORMAT_WALLPAPER_ROLL)
-#define PISP_IMAGE_FORMAT_bpp_32(fmt) ((fmt) & PISP_IMAGE_FORMAT_BPP_32)
+#define PISP_IMAGE_FORMAT_BPP_32(fmt) ((fmt) & PISP_IMAGE_FORMAT_BPP_32)
 #define PISP_IMAGE_FORMAT_HOG(fmt)                                             \
 	((fmt) &                                                               \
 	 (PISP_IMAGE_FORMAT_HOG_SIGNED | PISP_IMAGE_FORMAT_HOG_UNSIGNED))
-- 
cgit v1.2.3


From 639065c621df9bad9d94373084e1c568f81d34e0 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Date: Fri, 28 Jun 2024 15:29:45 +0200
Subject: media: uapi: pisp_be_config: Re-sort pisp_be_tiles_config

The order of the members of pisp_be_tiles_config is relevant
as the driver logic assumes 'config' to be at offset 0.

Re-sort the member to match the driver's expectations.

Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI")
Signed-off-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Acked-by: Naushir Patuck <naush@raspberrypi.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 include/uapi/linux/media/raspberrypi/pisp_be_config.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h
index 27d0cc417d6b..f8650ca92bf8 100644
--- a/include/uapi/linux/media/raspberrypi/pisp_be_config.h
+++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h
@@ -919,9 +919,9 @@ struct pisp_tile {
  * @config:	PiSP Back End configuration
  */
 struct pisp_be_tiles_config {
+	struct pisp_be_config config;
 	struct pisp_tile tiles[PISP_BACK_END_NUM_TILES];
 	__u32 num_tiles;
-	struct pisp_be_config config;
 } __attribute__((packed));
 
 #endif /* _UAPI_PISP_BE_CONFIG_H_ */
-- 
cgit v1.2.3


From 1c2c57bd439ecaffc728139a0a00701bee886d0a Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Date: Fri, 28 Jun 2024 15:29:46 +0200
Subject: media: uapi: pisp_be_config: Add extra config fields

Complete the pisp_be_config strcture by adding fields that even if not
written to the HW are relevant to complete the uAPI and put it in par
with the BSP driver.

Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI")
Signed-off-by: Jacopo Mondi <jacopo.mondi@ideasonboard.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 .../uapi/linux/media/raspberrypi/pisp_be_config.h  | 41 ++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h
index f8650ca92bf8..cbeb714f4d61 100644
--- a/include/uapi/linux/media/raspberrypi/pisp_be_config.h
+++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h
@@ -716,6 +716,13 @@ struct pisp_be_hog_buffer_config {
 /**
  * struct pisp_be_config - RaspberryPi PiSP Back End Processing configuration
  *
+ * @input_buffer:		Input buffer addresses
+ * @tdn_input_buffer:		TDN input buffer addresses
+ * @stitch_input_buffer:	Stitch input buffer addresses
+ * @tdn_output_buffer:		TDN output buffer addresses
+ * @stitch_output_buffer:	Stitch output buffer addresses
+ * @output_buffer:		Output buffers addresses
+ * @hog_buffer:			HOG buffer addresses
  * @global:			Global PiSP configuration
  * @input_format:		Input image format
  * @decompress:			Decompress configuration
@@ -753,8 +760,30 @@ struct pisp_be_hog_buffer_config {
  * @resample:			Resampling configuration
  * @output_format:		Output format configuration
  * @hog:			HOG configuration
+ * @axi:			AXI bus configuration
+ * @lsc_extra:			LSC extra info
+ * @cac_extra:			CAC extra info
+ * @downscale_extra:		Downscaler extra info
+ * @resample_extra:		Resample extra info
+ * @crop:			Crop configuration
+ * @hog_format:			HOG format info
+ * @dirty_flags_bayer:		Bayer enable dirty flags
+ *				(:c:type:`pisp_be_bayer_enable`)
+ * @dirty_flags_rgb:		RGB enable dirty flags
+ *				(:c:type:`pisp_be_rgb_enable`)
+ * @dirty_flags_extra:		Extra dirty flags
  */
 struct pisp_be_config {
+	/* I/O configuration: */
+	struct pisp_be_input_buffer_config input_buffer;
+	struct pisp_be_tdn_input_buffer_config tdn_input_buffer;
+	struct pisp_be_stitch_input_buffer_config stitch_input_buffer;
+	struct pisp_be_tdn_output_buffer_config tdn_output_buffer;
+	struct pisp_be_stitch_output_buffer_config stitch_output_buffer;
+	struct pisp_be_output_buffer_config
+				output_buffer[PISP_BACK_END_NUM_OUTPUTS];
+	struct pisp_be_hog_buffer_config hog_buffer;
+	/* Processing configuration: */
 	struct pisp_be_global_config global;
 	struct pisp_image_format_config input_format;
 	struct pisp_decompress_config decompress;
@@ -793,6 +822,18 @@ struct pisp_be_config {
 	struct pisp_be_output_format_config
 				output_format[PISP_BACK_END_NUM_OUTPUTS];
 	struct pisp_be_hog_config hog;
+	struct pisp_be_axi_config axi;
+	/* Non-register fields: */
+	struct pisp_be_lsc_extra lsc_extra;
+	struct pisp_be_cac_extra cac_extra;
+	struct pisp_be_downscale_extra
+				downscale_extra[PISP_BACK_END_NUM_OUTPUTS];
+	struct pisp_be_resample_extra resample_extra[PISP_BACK_END_NUM_OUTPUTS];
+	struct pisp_be_crop_config crop;
+	struct pisp_image_format_config hog_format;
+	__u32 dirty_flags_bayer; /* these use pisp_be_bayer_enable */
+	__u32 dirty_flags_rgb; /* use pisp_be_rgb_enable */
+	__u32 dirty_flags_extra; /* these use pisp_be_dirty_t */
 } __attribute__((packed));
 
 /**
-- 
cgit v1.2.3


From 0728c810873e1a94e8b767f9809af940c9307d60 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Jul 2024 16:42:33 +0200
Subject: thermal: trip: Pass trip pointer to .set_trip_temp() thermal zone
 callback

Out of several drivers implementing the .set_trip_temp() thermal zone
operation, three don't actually use the trip ID argument passed to it,
two call __thermal_zone_get_trip() to get a struct thermal_trip
corresponding to the given trip ID, and the other use the trip ID as an
index into their own data structures with the assumption that it will
always match the ordering of entries in the trips table passed to the
core during thermal zone registration, which is fragile and not really
guaranteed.

Even though the trip IDs used by the core are in fact their indices in the
trips table passed to it by the thermal zone creator, that is purely a
matter of convenience and should not be relied on for correctness.

For this reason, modify trip_point_temp_store() to pass a (const) trip
pointer to .set_trip_temp() and adjust the drivers implementing it
accordingly.

This helps to simplify the drivers invoking __thermal_zone_get_trip()
from their .set_trip_temp() callback functions because they will not
need to do it now and the other drivers can store their internal
trip indices in the priv field in struct thermal_trip and their
.set_trip_temp() callback functions can get those indices from there.

The intel_quark_dts thermal driver can instead use the trip type to
determine the requisite trip index.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/8392906.T7Z3S40VBb@rjwysocki.net
[ rjw: Add missing colon and 2 empty code lines ]
[ rjw: Add missing change in imx_thermal.c and adjust the changelog ]
[ rjw: Drop an unused local variable ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/thermal.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 3bd1b361ec34..cdd9c722cda2 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -93,7 +93,8 @@ struct thermal_zone_device_ops {
 	int (*set_trips) (struct thermal_zone_device *, int, int);
 	int (*change_mode) (struct thermal_zone_device *,
 		enum thermal_device_mode);
-	int (*set_trip_temp) (struct thermal_zone_device *, int, int);
+	int (*set_trip_temp) (struct thermal_zone_device *,
+			      const struct thermal_trip *, int);
 	int (*get_crit_temp) (struct thermal_zone_device *, int *);
 	int (*set_emul_temp) (struct thermal_zone_device *, int);
 	int (*get_trend) (struct thermal_zone_device *,
-- 
cgit v1.2.3


From 5b674baa596e624fde8bf62b9a3d8a26eef399b2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Jul 2024 16:44:27 +0200
Subject: thermal: trip: Fold __thermal_zone_get_trip() into its caller

Because __thermal_zone_get_trip() is only called by thermal_zone_get_trip()
now, fold the former into the latter.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/22339769.EfDdHjke4D@rjwysocki.net
---
 include/linux/thermal.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index cdd9c722cda2..25fbf960b474 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -202,8 +202,6 @@ static inline void devm_thermal_of_zone_unregister(struct device *dev,
 }
 #endif
 
-int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
-			    struct thermal_trip *trip);
 int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
 			  struct thermal_trip *trip);
 int for_each_thermal_trip(struct thermal_zone_device *tz,
-- 
cgit v1.2.3


From 4bdc3eaa102b6bedb0800f76f53eca516d5cf20c Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Wed, 10 Jul 2024 16:35:21 +1200
Subject: clocksource/drivers/realtek: Add timer driver for rtl-otto platforms

The timer/counter block on the Realtek SoCs provides up to 5 timers. It
also includes a watchdog timer which is handled by the
realtek_otto_wdt.c driver.

One timer will be used per CPU as a local clock event generator. An
additional timer will be used as an overal stable clocksource.

Signed-off-by: Markus Stockhausen <markus.stockhausen@gmx.de>
Signed-off-by: Sander Vanheule <sander@svanheule.net>
Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Link: https://lore.kernel.org/r/20240710043524.1535151-8-chris.packham@alliedtelesis.co.nz
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 7a5785f405b6..56b744dc1317 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -171,6 +171,7 @@ enum cpuhp_state {
 	CPUHP_AP_ARMADA_TIMER_STARTING,
 	CPUHP_AP_MIPS_GIC_TIMER_STARTING,
 	CPUHP_AP_ARC_TIMER_STARTING,
+	CPUHP_AP_REALTEK_TIMER_STARTING,
 	CPUHP_AP_RISCV_TIMER_STARTING,
 	CPUHP_AP_CLINT_TIMER_STARTING,
 	CPUHP_AP_CSKY_TIMER_STARTING,
-- 
cgit v1.2.3


From 75ed63a5ab5d1d2872c735bc7edf8fef0e2fa2ea Mon Sep 17 00:00:00 2001
From: Shenghao Ding <shenghao-ding@ti.com>
Date: Wed, 10 Jul 2024 14:42:37 +0800
Subject: ASoC: tas2781: Add new Kontrol to set tas2563 digital Volume

Requriment from customer to add new kcontrol to set tas2563 digital
Volume

Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
Link: https://patch.msgid.link/20240710064238.1480-1-shenghao-ding@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/tas2781-tlv.h | 262 +++++++++++++++++++++++++++++++++++++++++++-
 include/sound/tas2781.h     |   1 +
 2 files changed, 262 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h
index 1dc59005d241..99c41bfc7827 100644
--- a/include/sound/tas2781-tlv.h
+++ b/include/sound/tas2781-tlv.h
@@ -2,7 +2,7 @@
 //
 // ALSA SoC Texas Instruments TAS2781 Audio Smart Amplifier
 //
-// Copyright (C) 2022 - 2023 Texas Instruments Incorporated
+// Copyright (C) 2022 - 2024 Texas Instruments Incorporated
 // https://www.ti.com
 //
 // The TAS2781 driver implements a flexible and configurable
@@ -17,5 +17,265 @@
 
 static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0);
 static const DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0);
+static const DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1);
 
+/* pow(10, db/20) * pow(2,30) */
+static const unsigned char tas2563_dvc_table[][4] = {
+	{ 0X00, 0X00, 0X00, 0X00 }, /* -121.5db */
+	{ 0X00, 0X00, 0X03, 0XBC }, /* -121.0db */
+	{ 0X00, 0X00, 0X03, 0XF5 }, /* -120.5db */
+	{ 0X00, 0X00, 0X04, 0X31 }, /* -120.0db */
+	{ 0X00, 0X00, 0X04, 0X71 }, /* -119.5db */
+	{ 0X00, 0X00, 0X04, 0XB4 }, /* -119.0db */
+	{ 0X00, 0X00, 0X04, 0XFC }, /* -118.5db */
+	{ 0X00, 0X00, 0X05, 0X47 }, /* -118.0db */
+	{ 0X00, 0X00, 0X05, 0X97 }, /* -117.5db */
+	{ 0X00, 0X00, 0X05, 0XEC }, /* -117.0db */
+	{ 0X00, 0X00, 0X06, 0X46 }, /* -116.5db */
+	{ 0X00, 0X00, 0X06, 0XA5 }, /* -116.0db */
+	{ 0X00, 0X00, 0X07, 0X0A }, /* -115.5db */
+	{ 0X00, 0X00, 0X07, 0X75 }, /* -115.0db */
+	{ 0X00, 0X00, 0X07, 0XE6 }, /* -114.5db */
+	{ 0X00, 0X00, 0X08, 0X5E }, /* -114.0db */
+	{ 0X00, 0X00, 0X08, 0XDD }, /* -113.5db */
+	{ 0X00, 0X00, 0X09, 0X63 }, /* -113.0db */
+	{ 0X00, 0X00, 0X09, 0XF2 }, /* -112.5db */
+	{ 0X00, 0X00, 0X0A, 0X89 }, /* -112.0db */
+	{ 0X00, 0X00, 0X0B, 0X28 }, /* -111.5db */
+	{ 0X00, 0X00, 0X0B, 0XD2 }, /* -111.0db */
+	{ 0X00, 0X00, 0X0C, 0X85 }, /* -110.5db */
+	{ 0X00, 0X00, 0X0D, 0X43 }, /* -110.0db */
+	{ 0X00, 0X00, 0X0E, 0X0C }, /* -109.5db */
+	{ 0X00, 0X00, 0X0E, 0XE1 }, /* -109.0db */
+	{ 0X00, 0X00, 0X0F, 0XC3 }, /* -108.5db */
+	{ 0X00, 0X00, 0X10, 0XB2 }, /* -108.0db */
+	{ 0X00, 0X00, 0X11, 0XAF }, /* -107.5db */
+	{ 0X00, 0X00, 0X12, 0XBC }, /* -107.0db */
+	{ 0X00, 0X00, 0X13, 0XD8 }, /* -106.5db */
+	{ 0X00, 0X00, 0X15, 0X05 }, /* -106.0db */
+	{ 0X00, 0X00, 0X16, 0X44 }, /* -105.5db */
+	{ 0X00, 0X00, 0X17, 0X96 }, /* -105.0db */
+	{ 0X00, 0X00, 0X18, 0XFB }, /* -104.5db */
+	{ 0X00, 0X00, 0X1A, 0X76 }, /* -104.0db */
+	{ 0X00, 0X00, 0X1C, 0X08 }, /* -103.5db */
+	{ 0X00, 0X00, 0X1D, 0XB1 }, /* -103.0db */
+	{ 0X00, 0X00, 0X1F, 0X73 }, /* -102.5db */
+	{ 0X00, 0X00, 0X21, 0X51 }, /* -102.0db */
+	{ 0X00, 0X00, 0X23, 0X4A }, /* -101.5db */
+	{ 0X00, 0X00, 0X25, 0X61 }, /* -101.0db */
+	{ 0X00, 0X00, 0X27, 0X98 }, /* -100.5db */
+	{ 0X00, 0X00, 0X29, 0XF1 }, /* -100.0db */
+	{ 0X00, 0X00, 0X2C, 0X6D }, /* -99.5db */
+	{ 0X00, 0X00, 0X2F, 0X0F }, /* -99.0db */
+	{ 0X00, 0X00, 0X31, 0XD9 }, /* -98.5db */
+	{ 0X00, 0X00, 0X34, 0XCD }, /* -98.0db */
+	{ 0X00, 0X00, 0X37, 0XEE }, /* -97.5db */
+	{ 0X00, 0X00, 0X3B, 0X3F }, /* -97.0db */
+	{ 0X00, 0X00, 0X3E, 0XC1 }, /* -96.5db */
+	{ 0X00, 0X00, 0X42, 0X79 }, /* -96.0db */
+	{ 0X00, 0X00, 0X46, 0X6A }, /* -95.5db */
+	{ 0X00, 0X00, 0X4A, 0X96 }, /* -95.0db */
+	{ 0X00, 0X00, 0X4F, 0X01 }, /* -94.5db */
+	{ 0X00, 0X00, 0X53, 0XAF }, /* -94.0db */
+	{ 0X00, 0X00, 0X58, 0XA5 }, /* -93.5db */
+	{ 0X00, 0X00, 0X5D, 0XE6 }, /* -93.0db */
+	{ 0X00, 0X00, 0X63, 0X76 }, /* -92.5db */
+	{ 0X00, 0X00, 0X69, 0X5B }, /* -92.0db */
+	{ 0X00, 0X00, 0X6F, 0X99 }, /* -91.5db */
+	{ 0X00, 0X00, 0X76, 0X36 }, /* -91.0db */
+	{ 0X00, 0X00, 0X7D, 0X37 }, /* -90.5db */
+	{ 0X00, 0X00, 0X84, 0XA2 }, /* -90.0db */
+	{ 0X00, 0X00, 0X8C, 0X7E }, /* -89.5db */
+	{ 0X00, 0X00, 0X94, 0XD1 }, /* -89.0db */
+	{ 0X00, 0X00, 0X9D, 0XA3 }, /* -88.5db */
+	{ 0X00, 0X00, 0XA6, 0XFA }, /* -88.0db */
+	{ 0X00, 0X00, 0XB0, 0XDF }, /* -87.5db */
+	{ 0X00, 0X00, 0XBB, 0X5A }, /* -87.0db */
+	{ 0X00, 0X00, 0XC6, 0X74 }, /* -86.5db */
+	{ 0X00, 0X00, 0XD2, 0X36 }, /* -86.0db */
+	{ 0X00, 0X00, 0XDE, 0XAB }, /* -85.5db */
+	{ 0X00, 0X00, 0XEB, 0XDC }, /* -85.0db */
+	{ 0X00, 0X00, 0XF9, 0XD6 }, /* -84.5db */
+	{ 0X00, 0X01, 0X08, 0XA4 }, /* -84.0db */
+	{ 0X00, 0X01, 0X18, 0X52 }, /* -83.5db */
+	{ 0X00, 0X01, 0X28, 0XEF }, /* -83.0db */
+	{ 0X00, 0X01, 0X3A, 0X87 }, /* -82.5db */
+	{ 0X00, 0X01, 0X4D, 0X2A }, /* -82.0db */
+	{ 0X00, 0X01, 0X60, 0XE8 }, /* -81.5db */
+	{ 0X00, 0X01, 0X75, 0XD1 }, /* -81.0db */
+	{ 0X00, 0X01, 0X8B, 0XF7 }, /* -80.5db */
+	{ 0X00, 0X01, 0XA3, 0X6E }, /* -80.0db */
+	{ 0X00, 0X01, 0XBC, 0X48 }, /* -79.5db */
+	{ 0X00, 0X01, 0XD6, 0X9B }, /* -79.0db */
+	{ 0X00, 0X01, 0XF2, 0X7E }, /* -78.5db */
+	{ 0X00, 0X02, 0X10, 0X08 }, /* -78.0db */
+	{ 0X00, 0X02, 0X2F, 0X51 }, /* -77.5db */
+	{ 0X00, 0X02, 0X50, 0X76 }, /* -77.0db */
+	{ 0X00, 0X02, 0X73, 0X91 }, /* -76.5db */
+	{ 0X00, 0X02, 0X98, 0XC0 }, /* -76.0db */
+	{ 0X00, 0X02, 0XC0, 0X24 }, /* -75.5db */
+	{ 0X00, 0X02, 0XE9, 0XDD }, /* -75.0db */
+	{ 0X00, 0X03, 0X16, 0X0F }, /* -74.5db */
+	{ 0X00, 0X03, 0X44, 0XDF }, /* -74.0db */
+	{ 0X00, 0X03, 0X76, 0X76 }, /* -73.5db */
+	{ 0X00, 0X03, 0XAA, 0XFC }, /* -73.0db */
+	{ 0X00, 0X03, 0XE2, 0XA0 }, /* -72.5db */
+	{ 0X00, 0X04, 0X1D, 0X8F }, /* -72.0db */
+	{ 0X00, 0X04, 0X5B, 0XFD }, /* -71.5db */
+	{ 0X00, 0X04, 0X9E, 0X1D }, /* -71.0db */
+	{ 0X00, 0X04, 0XE4, 0X29 }, /* -70.5db */
+	{ 0X00, 0X05, 0X2E, 0X5A }, /* -70.0db */
+	{ 0X00, 0X05, 0X7C, 0XF2 }, /* -69.5db */
+	{ 0X00, 0X05, 0XD0, 0X31 }, /* -69.0db */
+	{ 0X00, 0X06, 0X28, 0X60 }, /* -68.5db */
+	{ 0X00, 0X06, 0X85, 0XC8 }, /* -68.0db */
+	{ 0X00, 0X06, 0XE8, 0XB9 }, /* -67.5db */
+	{ 0X00, 0X07, 0X51, 0X86 }, /* -67.0db */
+	{ 0X00, 0X07, 0XC0, 0X8A }, /* -66.5db */
+	{ 0X00, 0X08, 0X36, 0X21 }, /* -66.0db */
+	{ 0X00, 0X08, 0XB2, 0XB0 }, /* -65.5db */
+	{ 0X00, 0X09, 0X36, 0XA1 }, /* -65.0db */
+	{ 0X00, 0X09, 0XC2, 0X63 }, /* -64.5db */
+	{ 0X00, 0X0A, 0X56, 0X6D }, /* -64.0db */
+	{ 0X00, 0X0A, 0XF3, 0X3C }, /* -63.5db */
+	{ 0X00, 0X0B, 0X99, 0X56 }, /* -63.0db */
+	{ 0X00, 0X0C, 0X49, 0X48 }, /* -62.5db */
+	{ 0X00, 0X0D, 0X03, 0XA7 }, /* -62.0db */
+	{ 0X00, 0X0D, 0XC9, 0X11 }, /* -61.5db */
+	{ 0X00, 0X0E, 0X9A, 0X2D }, /* -61.0db */
+	{ 0X00, 0X0F, 0X77, 0XAD }, /* -60.5db */
+	{ 0X00, 0X10, 0X62, 0X4D }, /* -60.0db */
+	{ 0X00, 0X11, 0X5A, 0XD5 }, /* -59.5db */
+	{ 0X00, 0X12, 0X62, 0X16 }, /* -59.0db */
+	{ 0X00, 0X13, 0X78, 0XF0 }, /* -58.5db */
+	{ 0X00, 0X14, 0XA0, 0X50 }, /* -58.0db */
+	{ 0X00, 0X15, 0XD9, 0X31 }, /* -57.5db */
+	{ 0X00, 0X17, 0X24, 0X9C }, /* -57.0db */
+	{ 0X00, 0X18, 0X83, 0XAA }, /* -56.5db */
+	{ 0X00, 0X19, 0XF7, 0X86 }, /* -56.0db */
+	{ 0X00, 0X1B, 0X81, 0X6A }, /* -55.5db */
+	{ 0X00, 0X1D, 0X22, 0XA4 }, /* -55.0db */
+	{ 0X00, 0X1E, 0XDC, 0X98 }, /* -54.5db */
+	{ 0X00, 0X20, 0XB0, 0XBC }, /* -54.0db */
+	{ 0X00, 0X22, 0XA0, 0X9D }, /* -53.5db */
+	{ 0X00, 0X24, 0XAD, 0XE0 }, /* -53.0db */
+	{ 0X00, 0X26, 0XDA, 0X43 }, /* -52.5db */
+	{ 0X00, 0X29, 0X27, 0X9D }, /* -52.0db */
+	{ 0X00, 0X2B, 0X97, 0XE3 }, /* -51.5db */
+	{ 0X00, 0X2E, 0X2D, 0X27 }, /* -51.0db */
+	{ 0X00, 0X30, 0XE9, 0X9A }, /* -50.5db */
+	{ 0X00, 0X33, 0XCF, 0X8D }, /* -50.0db */
+	{ 0X00, 0X36, 0XE1, 0X78 }, /* -49.5db */
+	{ 0X00, 0X3A, 0X21, 0XF3 }, /* -49.0db */
+	{ 0X00, 0X3D, 0X93, 0XC3 }, /* -48.5db */
+	{ 0X00, 0X41, 0X39, 0XD3 }, /* -48.0db */
+	{ 0X00, 0X45, 0X17, 0X3B }, /* -47.5db */
+	{ 0X00, 0X49, 0X2F, 0X44 }, /* -47.0db */
+	{ 0X00, 0X4D, 0X85, 0X66 }, /* -46.5db */
+	{ 0X00, 0X52, 0X1D, 0X50 }, /* -46.0db */
+	{ 0X00, 0X56, 0XFA, 0XE8 }, /* -45.5db */
+	{ 0X00, 0X5C, 0X22, 0X4E }, /* -45.0db */
+	{ 0X00, 0X61, 0X97, 0XE1 }, /* -44.5db */
+	{ 0X00, 0X67, 0X60, 0X44 }, /* -44.0db */
+	{ 0X00, 0X6D, 0X80, 0X60 }, /* -43.5db */
+	{ 0X00, 0X73, 0XFD, 0X65 }, /* -43.0db */
+	{ 0X00, 0X7A, 0XDC, 0XD7 }, /* -42.5db */
+	{ 0X00, 0X82, 0X24, 0X8A }, /* -42.0db */
+	{ 0X00, 0X89, 0XDA, 0XAB }, /* -41.5db */
+	{ 0X00, 0X92, 0X05, 0XC6 }, /* -41.0db */
+	{ 0X00, 0X9A, 0XAC, 0XC8 }, /* -40.5db */
+	{ 0X00, 0XA3, 0XD7, 0X0A }, /* -40.0db */
+	{ 0X00, 0XAD, 0X8C, 0X52 }, /* -39.5db */
+	{ 0X00, 0XB7, 0XD4, 0XDD }, /* -39.0db */
+	{ 0X00, 0XC2, 0XB9, 0X65 }, /* -38.5db */
+	{ 0X00, 0XCE, 0X43, 0X28 }, /* -38.0db */
+	{ 0X00, 0XDA, 0X7B, 0XF1 }, /* -37.5db */
+	{ 0X00, 0XE7, 0X6E, 0X1E }, /* -37.0db */
+	{ 0X00, 0XF5, 0X24, 0XAC }, /* -36.5db */
+	{ 0X01, 0X03, 0XAB, 0X3D }, /* -36.0db */
+	{ 0X01, 0X13, 0X0E, 0X24 }, /* -35.5db */
+	{ 0X01, 0X23, 0X5A, 0X71 }, /* -35.0db */
+	{ 0X01, 0X34, 0X9D, 0XF8 }, /* -34.5db */
+	{ 0X01, 0X46, 0XE7, 0X5D }, /* -34.0db */
+	{ 0X01, 0X5A, 0X46, 0X27 }, /* -33.5db */
+	{ 0X01, 0X6E, 0XCA, 0XC5 }, /* -33.0db */
+	{ 0X01, 0X84, 0X86, 0X9F }, /* -32.5db */
+	{ 0X01, 0X9B, 0X8C, 0X27 }, /* -32.0db */
+	{ 0X01, 0XB3, 0XEE, 0XE5 }, /* -31.5db */
+	{ 0X01, 0XCD, 0XC3, 0X8C }, /* -31.0db */
+	{ 0X01, 0XE9, 0X20, 0X05 }, /* -30.5db */
+	{ 0X02, 0X06, 0X1B, 0X89 }, /* -30.0db */
+	{ 0X02, 0X24, 0XCE, 0XB0 }, /* -29.5db */
+	{ 0X02, 0X45, 0X53, 0X85 }, /* -29.0db */
+	{ 0X02, 0X67, 0XC5, 0XA2 }, /* -28.5db */
+	{ 0X02, 0X8C, 0X42, 0X3F }, /* -28.0db */
+	{ 0X02, 0XB2, 0XE8, 0X55 }, /* -27.5db */
+	{ 0X02, 0XDB, 0XD8, 0XAD }, /* -27.0db */
+	{ 0X03, 0X07, 0X36, 0X05 }, /* -26.5db */
+	{ 0X03, 0X35, 0X25, 0X29 }, /* -26.0db */
+	{ 0X03, 0X65, 0XCD, 0X13 }, /* -25.5db */
+	{ 0X03, 0X99, 0X57, 0X0C }, /* -25.0db */
+	{ 0X03, 0XCF, 0XEE, 0XCF }, /* -24.5db */
+	{ 0X04, 0X09, 0XC2, 0XB0 }, /* -24.0db */
+	{ 0X04, 0X47, 0X03, 0XC1 }, /* -23.5db */
+	{ 0X04, 0X87, 0XE5, 0XFB }, /* -23.0db */
+	{ 0X04, 0XCC, 0XA0, 0X6D }, /* -22.5db */
+	{ 0X05, 0X15, 0X6D, 0X68 }, /* -22.0db */
+	{ 0X05, 0X62, 0X8A, 0XB3 }, /* -21.5db */
+	{ 0X05, 0XB4, 0X39, 0XBC }, /* -21.0db */
+	{ 0X06, 0X0A, 0XBF, 0XD4 }, /* -20.5db */
+	{ 0X06, 0X66, 0X66, 0X66 }, /* -20.0db */
+	{ 0X06, 0XC7, 0X7B, 0X36 }, /* -19.5db */
+	{ 0X07, 0X2E, 0X50, 0XA6 }, /* -19.0db */
+	{ 0X07, 0X9B, 0X3D, 0XF6 }, /* -18.5db */
+	{ 0X08, 0X0E, 0X9F, 0X96 }, /* -18.0db */
+	{ 0X08, 0X88, 0XD7, 0X6D }, /* -17.5db */
+	{ 0X09, 0X0A, 0X4D, 0X2F }, /* -17.0db */
+	{ 0X09, 0X93, 0X6E, 0XB8 }, /* -16.5db */
+	{ 0X0A, 0X24, 0XB0, 0X62 }, /* -16.0db */
+	{ 0X0A, 0XBE, 0X8D, 0X70 }, /* -15.5db */
+	{ 0X0B, 0X61, 0X88, 0X71 }, /* -15.0db */
+	{ 0X0C, 0X0E, 0X2B, 0XB0 }, /* -14.5db */
+	{ 0X0C, 0XC5, 0X09, 0XAB }, /* -14.0db */
+	{ 0X0D, 0X86, 0XBD, 0X8D }, /* -13.5db */
+	{ 0X0E, 0X53, 0XEB, 0XB3 }, /* -13.0db */
+	{ 0X0F, 0X2D, 0X42, 0X38 }, /* -12.5db */
+	{ 0X10, 0X13, 0X79, 0X87 }, /* -12.0db */
+	{ 0X11, 0X07, 0X54, 0XF9 }, /* -11.5db */
+	{ 0X12, 0X09, 0XA3, 0X7A }, /* -11.0db */
+	{ 0X13, 0X1B, 0X40, 0X39 }, /* -10.5db */
+	{ 0X14, 0X3D, 0X13, 0X62 }, /* -10.0db */
+	{ 0X15, 0X70, 0X12, 0XE1 }, /* -9.5db */
+	{ 0X16, 0XB5, 0X43, 0X37 }, /* -9.0db */
+	{ 0X18, 0X0D, 0XB8, 0X54 }, /* -8.5db */
+	{ 0X19, 0X7A, 0X96, 0X7F }, /* -8.0db */
+	{ 0X1A, 0XFD, 0X13, 0X54 }, /* -7.5db */
+	{ 0X1C, 0X96, 0X76, 0XC6 }, /* -7.0db */
+	{ 0X1E, 0X48, 0X1C, 0X37 }, /* -6.5db */
+	{ 0X20, 0X13, 0X73, 0X9E }, /* -6.0db */
+	{ 0X21, 0XFA, 0X02, 0XBF }, /* -5.5db */
+	{ 0X23, 0XFD, 0X66, 0X78 }, /* -5.0db */
+	{ 0X26, 0X1F, 0X54, 0X1C }, /* -4.5db */
+	{ 0X28, 0X61, 0X9A, 0XE9 }, /* -4.0db */
+	{ 0X2A, 0XC6, 0X25, 0X91 }, /* -3.5db */
+	{ 0X2D, 0X4E, 0XFB, 0XD5 }, /* -3.0db */
+	{ 0X2F, 0XFE, 0X44, 0X48 }, /* -2.5db */
+	{ 0X32, 0XD6, 0X46, 0X17 }, /* -2.0db */
+	{ 0X35, 0XD9, 0X6B, 0X02 }, /* -1.5db */
+	{ 0X39, 0X0A, 0X41, 0X5F }, /* -1.0db */
+	{ 0X3C, 0X6B, 0X7E, 0X4F }, /* -0.5db */
+	{ 0X40, 0X00, 0X00, 0X00 }, /* 0.0db */
+	{ 0X43, 0XCA, 0XD0, 0X22 }, /* 0.5db */
+	{ 0X47, 0XCF, 0X26, 0X7D }, /* 1.0db */
+	{ 0X4C, 0X10, 0X6B, 0XA5 }, /* 1.5db */
+	{ 0X50, 0X92, 0X3B, 0XE3 }, /* 2.0db */
+	{ 0X55, 0X58, 0X6A, 0X46 }, /* 2.5db */
+	{ 0X5A, 0X67, 0X03, 0XDF }, /* 3.0db */
+	{ 0X5F, 0XC2, 0X53, 0X32 }, /* 3.5db */
+	{ 0X65, 0X6E, 0XE3, 0XDB }, /* 4.0db */
+	{ 0X6B, 0X71, 0X86, 0X68 }, /* 4.5db */
+	{ 0X71, 0XCF, 0X54, 0X71 }, /* 5.0db */
+	{ 0X78, 0X8D, 0XB4, 0XE9 }, /* 5.5db */
+	{ 0XFF, 0XFF, 0XFF, 0XFF }, /* 6.0db */
+};
 #endif
diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h
index a43ad6dcb7c7..18161d02a96f 100644
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -50,6 +50,7 @@
 #define TASDEVICE_I2CChecksum		TASDEVICE_REG(0x0, 0x0, 0x7E)
 
 /* Volume control */
+#define TAS2563_DVC_LVL			TASDEVICE_REG(0x00, 0x02, 0x0C)
 #define TAS2781_DVC_LVL			TASDEVICE_REG(0x0, 0x0, 0x1A)
 #define TAS2781_AMP_LEVEL		TASDEVICE_REG(0x0, 0x0, 0x03)
 #define TAS2781_AMP_LEVEL_MASK		GENMASK(5, 1)
-- 
cgit v1.2.3


From 27e6a24a4cf3d25421c0f6ebb7c39f45fc14d20f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Jul 2024 13:56:54 -0400
Subject: mm, virt: merge AS_UNMOVABLE and AS_INACCESSIBLE

The flags AS_UNMOVABLE and AS_INACCESSIBLE were both added just for guest_memfd;
AS_UNMOVABLE is already in existing versions of Linux, while AS_INACCESSIBLE was
acked for inclusion in 6.11.

But really, they are the same thing: only guest_memfd uses them, at least for
now, and guest_memfd pages are unmovable because they should not be
accessed by the CPU.

So merge them into one; use the AS_INACCESSIBLE name which is more comprehensive.
At the same time, this fixes an embarrassing bug where AS_INACCESSIBLE was used
as a bit mask, despite it being just a bit index.

The bug was mostly benign, because AS_INACCESSIBLE's bit representation (1010)
corresponded to setting AS_UNEVICTABLE (which is already set) and AS_ENOSPC
(except no async writes can happen on the guest_memfd).  So the AS_INACCESSIBLE
flag simply had no effect.

Fixes: 1d23040caa8b ("KVM: guest_memfd: Use AS_INACCESSIBLE when creating guest_memfd inode")
Fixes: c72ceafbd12c ("mm: Introduce AS_INACCESSIBLE for encrypted/confidential memory")
Cc: linux-mm@kvack.org
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Hildenbrand <david@redhat.com>
Tested-by: Michael Roth <michael.roth@amd.com>
Reviewed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/pagemap.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ce7bac8f81da..e05585eda771 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -208,8 +208,8 @@ enum mapping_flags {
 	AS_RELEASE_ALWAYS,	/* Call ->release_folio(), even if no private data */
 	AS_STABLE_WRITES,	/* must wait for writeback before modifying
 				   folio contents */
-	AS_UNMOVABLE,		/* The mapping cannot be moved, ever */
-	AS_INACCESSIBLE,	/* Do not attempt direct R/W access to the mapping */
+	AS_INACCESSIBLE,	/* Do not attempt direct R/W access to the mapping,
+				   including to move the mapping */
 };
 
 /**
@@ -310,20 +310,20 @@ static inline void mapping_clear_stable_writes(struct address_space *mapping)
 	clear_bit(AS_STABLE_WRITES, &mapping->flags);
 }
 
-static inline void mapping_set_unmovable(struct address_space *mapping)
+static inline void mapping_set_inaccessible(struct address_space *mapping)
 {
 	/*
-	 * It's expected unmovable mappings are also unevictable. Compaction
+	 * It's expected inaccessible mappings are also unevictable. Compaction
 	 * migrate scanner (isolate_migratepages_block()) relies on this to
 	 * reduce page locking.
 	 */
 	set_bit(AS_UNEVICTABLE, &mapping->flags);
-	set_bit(AS_UNMOVABLE, &mapping->flags);
+	set_bit(AS_INACCESSIBLE, &mapping->flags);
 }
 
-static inline bool mapping_unmovable(struct address_space *mapping)
+static inline bool mapping_inaccessible(struct address_space *mapping)
 {
-	return test_bit(AS_UNMOVABLE, &mapping->flags);
+	return test_bit(AS_INACCESSIBLE, &mapping->flags);
 }
 
 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
-- 
cgit v1.2.3


From bc1a5cd002116552db4c3541e91f8a5b1b0cf65d Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <isaku.yamahata@intel.com>
Date: Wed, 10 Apr 2024 15:07:28 -0700
Subject: KVM: Add KVM_PRE_FAULT_MEMORY vcpu ioctl to pre-populate guest memory

Add a new ioctl KVM_PRE_FAULT_MEMORY in the KVM common code. It iterates on the
memory range and calls the arch-specific function.  The implementation is
optional and enabled by a Kconfig symbol.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Message-ID: <819322b8f25971f2b9933bfa4506e618508ad782.1712785629.git.isaku.yamahata@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  5 +++++
 include/uapi/linux/kvm.h | 10 ++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7b57878c8c18..c3c922bf077f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2477,4 +2477,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages
 void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
 #endif
 
+#ifdef CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY
+long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
+				    struct kvm_pre_fault_memory *range);
+#endif
+
 #endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d03842abae57..e5af8c692dc0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -917,6 +917,7 @@ struct kvm_enable_cap {
 #define KVM_CAP_MEMORY_ATTRIBUTES 233
 #define KVM_CAP_GUEST_MEMFD 234
 #define KVM_CAP_VM_TYPES 235
+#define KVM_CAP_PRE_FAULT_MEMORY 236
 
 struct kvm_irq_routing_irqchip {
 	__u32 irqchip;
@@ -1548,4 +1549,13 @@ struct kvm_create_guest_memfd {
 	__u64 reserved[6];
 };
 
+#define KVM_PRE_FAULT_MEMORY	_IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory)
+
+struct kvm_pre_fault_memory {
+	__u64 gpa;
+	__u64 size;
+	__u64 flags;
+	__u64 padding[5];
+};
+
 #endif /* __LINUX_KVM_H */
-- 
cgit v1.2.3


From 9b2bc6b9a264b863a2273c02db5ee9e214e0a526 Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Fri, 12 Jul 2024 12:31:32 +0100
Subject: iommu: Move IOMMU_DIRTY_NO_CLEAR define

Fixes the compile issue when CONFIG_IOMMU_API is not set.

Fixes: 4fe88fd8b4ae ("iommu/io-pgtable-arm: Add read_and_clear_dirty() support")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202407121602.HL9ih1it-lkp@intel.com/
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20240712113132.45100-1-shameerali.kolothum.thodi@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/iommu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7bc8dff7cf6d..104ce84647d4 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -317,6 +317,9 @@ enum iommu_dev_features {
 #define IOMMU_PASID_INVALID	(-1U)
 typedef unsigned int ioasid_t;
 
+/* Read but do not clear any dirty bits */
+#define IOMMU_DIRTY_NO_CLEAR (1 << 0)
+
 #ifdef CONFIG_IOMMU_API
 
 /**
@@ -353,9 +356,6 @@ struct iommu_dirty_bitmap {
 	struct iommu_iotlb_gather *gather;
 };
 
-/* Read but do not clear any dirty bits */
-#define IOMMU_DIRTY_NO_CLEAR (1 << 0)
-
 /**
  * struct iommu_dirty_ops - domain specific dirty tracking operations
  * @set_dirty_tracking: Enable or Disable dirty tracking on the iommu domain
-- 
cgit v1.2.3


From 2be32bbe6989e071aea1767cb54c7528a0dac07f Mon Sep 17 00:00:00 2001
From: Eric Chan <ericchancf@google.com>
Date: Thu, 11 Jul 2024 19:39:17 +0000
Subject: kunit: Fix the comment of KUNIT_ASSERT_STRNEQ as assertion

The current comment for KUNIT_ASSERT_STRNEQ incorrectly describes it as
an expectation. Since KUNIT_ASSERT_STRNEQ is an assertion, updates the
comment to correctly refer to it as such.

Signed-off-by: Eric Chan <ericchancf@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/kunit/test.h b/include/kunit/test.h
index ec61cad6b71d..e37df1df5a92 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -1455,12 +1455,12 @@ do {									       \
 				   ##__VA_ARGS__)
 
 /**
- * KUNIT_ASSERT_STRNEQ() - Expects that strings @left and @right are not equal.
+ * KUNIT_ASSERT_STRNEQ() - An assertion that strings @left and @right are not equal.
  * @test: The test context object.
  * @left: an arbitrary expression that evaluates to a null terminated string.
  * @right: an arbitrary expression that evaluates to a null terminated string.
  *
- * Sets an expectation that the values that @left and @right evaluate to are
+ * Sets an assertion that the values that @left and @right evaluate to are
  * not equal. This is semantically equivalent to
  * KUNIT_ASSERT_TRUE(@test, strcmp((@left), (@right))). See KUNIT_ASSERT_TRUE()
  * for more information.
-- 
cgit v1.2.3


From 7d4087b013895524438f2522367c984f776e09e3 Mon Sep 17 00:00:00 2001
From: Eric Chan <ericchancf@google.com>
Date: Thu, 11 Jul 2024 19:39:31 +0000
Subject: kunit: Rename KUNIT_ASSERT_FAILURE to KUNIT_FAIL_AND_ABORT for
 readability

Both KUNIT_FAIL and KUNIT_ASSERT_FAILURE defined to KUNIT_FAIL_ASSERTION
with different tpye of kunit_assert_type. The current naming of
KUNIT_ASSERT_FAILURE and KUNIT_FAIL_ASSERTION is confusing due to their
similarities. To improve readability and symmetry, renames
KUNIT_ASSERT_FAILURE to KUNIT_FAIL_AND_ABORT. Makes the naming
consistent, with KUNIT_FAIL and KUNIT_FAIL_AND_ABORT being symmetrical.
Additionally, an explanation for KUNIT_FAIL_AND_ABORT has been added to
clarify its usage.

Signed-off-by: Eric Chan <ericchancf@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/assert.h |  2 +-
 include/kunit/test.h   | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/kunit/assert.h b/include/kunit/assert.h
index 7e7490a74b13..bb879389f11d 100644
--- a/include/kunit/assert.h
+++ b/include/kunit/assert.h
@@ -60,7 +60,7 @@ void kunit_assert_prologue(const struct kunit_loc *loc,
  * struct kunit_fail_assert - Represents a plain fail expectation/assertion.
  * @assert: The parent of this type.
  *
- * Represents a simple KUNIT_FAIL/KUNIT_ASSERT_FAILURE that always fails.
+ * Represents a simple KUNIT_FAIL/KUNIT_FAIL_AND_ABORT that always fails.
  */
 struct kunit_fail_assert {
 	struct kunit_assert assert;
diff --git a/include/kunit/test.h b/include/kunit/test.h
index e37df1df5a92..fb58fa530c5e 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -1228,7 +1228,18 @@ do {									       \
 						fmt,			       \
 						##__VA_ARGS__)
 
-#define KUNIT_ASSERT_FAILURE(test, fmt, ...) \
+/**
+ * KUNIT_FAIL_AND_ABORT() - Always causes a test to fail and abort when evaluated.
+ * @test: The test context object.
+ * @fmt: an informational message to be printed when the assertion is made.
+ * @...: string format arguments.
+ *
+ * The opposite of KUNIT_SUCCEED(), it is an assertion that always fails. In
+ * other words, it always results in a failed assertion, and consequently
+ * always causes the test case to fail and abort when evaluated.
+ * See KUNIT_ASSERT_TRUE() for more information.
+ */
+#define KUNIT_FAIL_AND_ABORT(test, fmt, ...) \
 	KUNIT_FAIL_ASSERTION(test, KUNIT_ASSERTION, fmt, ##__VA_ARGS__)
 
 /**
-- 
cgit v1.2.3


From ebf51e460e488511d9ee60b07d00dac68883facf Mon Sep 17 00:00:00 2001
From: Eric Chan <ericchancf@google.com>
Date: Thu, 11 Jul 2024 19:39:45 +0000
Subject: kunit: Introduce KUNIT_ASSERT_MEMEQ and KUNIT_ASSERT_MEMNEQ macros

Introduces KUNIT_ASSERT_MEMEQ and KUNIT_ASSERT_MEMNEQ macros
to provide assert-type equivalents for memory comparison.
While KUNIT_EXPECT_MEMEQ and KUNIT_EXPECT_MEMNEQ are available for
expectations, the addition of these new macros ensures that assertions
can also be used for memory comparisons, enhancing the consistency and
completeness of the kunit framework.

Signed-off-by: Eric Chan <ericchancf@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'include')

diff --git a/include/kunit/test.h b/include/kunit/test.h
index fb58fa530c5e..e2a1f0928e8b 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -1486,6 +1486,60 @@ do {									       \
 				   fmt,					       \
 				   ##__VA_ARGS__)
 
+/**
+ * KUNIT_ASSERT_MEMEQ() - Asserts that the first @size bytes of @left and @right are equal.
+ * @test: The test context object.
+ * @left: An arbitrary expression that evaluates to the specified size.
+ * @right: An arbitrary expression that evaluates to the specified size.
+ * @size: Number of bytes compared.
+ *
+ * Sets an assertion that the values that @left and @right evaluate to are
+ * equal. This is semantically equivalent to
+ * KUNIT_ASSERT_TRUE(@test, !memcmp((@left), (@right), (@size))). See
+ * KUNIT_ASSERT_TRUE() for more information.
+ *
+ * Although this assertion works for any memory block, it is not recommended
+ * for comparing more structured data, such as structs. This assertion is
+ * recommended for comparing, for example, data arrays.
+ */
+#define KUNIT_ASSERT_MEMEQ(test, left, right, size) \
+	KUNIT_ASSERT_MEMEQ_MSG(test, left, right, size, NULL)
+
+#define KUNIT_ASSERT_MEMEQ_MSG(test, left, right, size, fmt, ...)	       \
+	KUNIT_MEM_ASSERTION(test,					       \
+			    KUNIT_ASSERTION,				       \
+			    left, ==, right,				       \
+			    size,					       \
+			    fmt,					       \
+			    ##__VA_ARGS__)
+
+/**
+ * KUNIT_ASSERT_MEMNEQ() - Asserts that the first @size bytes of @left and @right are not equal.
+ * @test: The test context object.
+ * @left: An arbitrary expression that evaluates to the specified size.
+ * @right: An arbitrary expression that evaluates to the specified size.
+ * @size: Number of bytes compared.
+ *
+ * Sets an assertion that the values that @left and @right evaluate to are
+ * not equal. This is semantically equivalent to
+ * KUNIT_ASSERT_TRUE(@test, memcmp((@left), (@right), (@size))). See
+ * KUNIT_ASSERT_TRUE() for more information.
+ *
+ * Although this assertion works for any memory block, it is not recommended
+ * for comparing more structured data, such as structs. This assertion is
+ * recommended for comparing, for example, data arrays.
+ */
+#define KUNIT_ASSERT_MEMNEQ(test, left, right, size) \
+	KUNIT_ASSERT_MEMNEQ_MSG(test, left, right, size, NULL)
+
+#define KUNIT_ASSERT_MEMNEQ_MSG(test, left, right, size, fmt, ...)	       \
+	KUNIT_MEM_ASSERTION(test,					       \
+			    KUNIT_ASSERTION,				       \
+			    left, !=, right,				       \
+			    size,					       \
+			    fmt,					       \
+			    ##__VA_ARGS__)
+
 /**
  * KUNIT_ASSERT_NULL() - Asserts that pointers @ptr is null.
  * @test: The test context object.
-- 
cgit v1.2.3


From 617069741dfbb141bc4574531a5dbbbad8ddf197 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 10 Jul 2024 20:53:12 +0200
Subject: dm: introduce the target flag mempool_needs_integrity

This commit introduces the dm target flag mempool_needs_integrity. When
the flag is set, device mapper will call bioset_integrity_create on it's
bio sets. The target can then call bio_integrity_alloc on the bios
allocated from the table's mempool.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 include/linux/device-mapper.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 384649a61bfa..1363f87fbba6 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -405,6 +405,12 @@ struct dm_target {
 	 * support it.
 	 */
 	bool flush_bypasses_map:1;
+
+	/*
+	 * Set if the target calls bio_integrity_alloc on bios received
+	 * in the map method.
+	 */
+	bool mempool_needs_integrity:1;
 };
 
 void *dm_per_bio_data(struct bio *bio, size_t data_size);
-- 
cgit v1.2.3


From 6a26f9c68901797261bc145975a02f85be0c1d8f Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Thu, 11 Jul 2024 10:14:57 +0000
Subject: cgroup/misc: Introduce misc.events.local

Currently the event counting provided by misc.events is hierarchical,
it's not practical if user is only concerned with events of a
specified cgroup. Therefore, introduce misc.events.local collect events
specific to the given cgroup.

This is analogous to memory.events.local and pids.events.local.

Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/misc_cgroup.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
index 618392d41975..49eef10c8e59 100644
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -40,6 +40,7 @@ struct misc_res {
 	atomic64_t watermark;
 	atomic64_t usage;
 	atomic64_t events;
+	atomic64_t events_local;
 };
 
 /**
@@ -53,6 +54,8 @@ struct misc_cg {
 
 	/* misc.events */
 	struct cgroup_file events_file;
+	/* misc.events.local */
+	struct cgroup_file events_local_file;
 
 	struct misc_res res[MISC_CG_RES_TYPES];
 };
-- 
cgit v1.2.3


From f7866c35873377313ff94398f17d425b28b71de1 Mon Sep 17 00:00:00 2001
From: Tengda Wu <wutengda@huaweicloud.com>
Date: Thu, 11 Jul 2024 22:58:18 +0800
Subject: bpf: Fix null pointer dereference in resolve_prog_type() for
 BPF_PROG_TYPE_EXT

When loading a EXT program without specifying `attr->attach_prog_fd`,
the `prog->aux->dst_prog` will be null. At this time, calling
resolve_prog_type() anywhere will result in a null pointer dereference.

Example stack trace:

[    8.107863] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004
[    8.108262] Mem abort info:
[    8.108384]   ESR = 0x0000000096000004
[    8.108547]   EC = 0x25: DABT (current EL), IL = 32 bits
[    8.108722]   SET = 0, FnV = 0
[    8.108827]   EA = 0, S1PTW = 0
[    8.108939]   FSC = 0x04: level 0 translation fault
[    8.109102] Data abort info:
[    8.109203]   ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
[    8.109399]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[    8.109614]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[    8.109836] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000101354000
[    8.110011] [0000000000000004] pgd=0000000000000000, p4d=0000000000000000
[    8.112624] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP
[    8.112783] Modules linked in:
[    8.113120] CPU: 0 PID: 99 Comm: may_access_dire Not tainted 6.10.0-rc3-next-20240613-dirty #1
[    8.113230] Hardware name: linux,dummy-virt (DT)
[    8.113390] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[    8.113429] pc : may_access_direct_pkt_data+0x24/0xa0
[    8.113746] lr : add_subprog_and_kfunc+0x634/0x8e8
[    8.113798] sp : ffff80008283b9f0
[    8.113813] x29: ffff80008283b9f0 x28: ffff800082795048 x27: 0000000000000001
[    8.113881] x26: ffff0000c0bb2600 x25: 0000000000000000 x24: 0000000000000000
[    8.113897] x23: ffff0000c1134000 x22: 000000000001864f x21: ffff0000c1138000
[    8.113912] x20: 0000000000000001 x19: ffff0000c12b8000 x18: ffffffffffffffff
[    8.113929] x17: 0000000000000000 x16: 0000000000000000 x15: 0720072007200720
[    8.113944] x14: 0720072007200720 x13: 0720072007200720 x12: 0720072007200720
[    8.113958] x11: 0720072007200720 x10: 0000000000f9fca4 x9 : ffff80008021f4e4
[    8.113991] x8 : 0101010101010101 x7 : 746f72705f6d656d x6 : 000000001e0e0f5f
[    8.114006] x5 : 000000000001864f x4 : ffff0000c12b8000 x3 : 000000000000001c
[    8.114020] x2 : 0000000000000002 x1 : 0000000000000000 x0 : 0000000000000000
[    8.114126] Call trace:
[    8.114159]  may_access_direct_pkt_data+0x24/0xa0
[    8.114202]  bpf_check+0x3bc/0x28c0
[    8.114214]  bpf_prog_load+0x658/0xa58
[    8.114227]  __sys_bpf+0xc50/0x2250
[    8.114240]  __arm64_sys_bpf+0x28/0x40
[    8.114254]  invoke_syscall.constprop.0+0x54/0xf0
[    8.114273]  do_el0_svc+0x4c/0xd8
[    8.114289]  el0_svc+0x3c/0x140
[    8.114305]  el0t_64_sync_handler+0x134/0x150
[    8.114331]  el0t_64_sync+0x168/0x170
[    8.114477] Code: 7100707f 54000081 f9401c00 f9403800 (b9400403)
[    8.118672] ---[ end trace 0000000000000000 ]---

One way to fix it is by forcing `attach_prog_fd` non-empty when
bpf_prog_load(). But this will lead to `libbpf_probe_bpf_prog_type`
API broken which use verifier log to probe prog type and will log
nothing if we reject invalid EXT prog before bpf_check().

Another way is by adding null check in resolve_prog_type().

The issue was introduced by commit 4a9c7bbe2ed4 ("bpf: Resolve to
prog->aux->dst_prog->type only for BPF_PROG_TYPE_EXT") which wanted
to correct type resolution for BPF_PROG_TYPE_TRACING programs. Before
that, the type resolution of BPF_PROG_TYPE_EXT prog actually follows
the logic below:

  prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;

It implies that when EXT program is not yet attached to `dst_prog`,
the prog type should be EXT itself. This code worked fine in the past.
So just keep using it.

Fix this by returning `prog->type` for BPF_PROG_TYPE_EXT if `dst_prog`
is not present in resolve_prog_type().

Fixes: 4a9c7bbe2ed4 ("bpf: Resolve to prog->aux->dst_prog->type only for BPF_PROG_TYPE_EXT")
Signed-off-by: Tengda Wu <wutengda@huaweicloud.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20240711145819.254178-2-wutengda@huaweicloud.com
---
 include/linux/bpf_verifier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index e98ba5a5cf79..6503c85b10a3 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -856,7 +856,7 @@ static inline u32 type_flag(u32 type)
 /* only use after check_attach_btf_id() */
 static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog)
 {
-	return prog->type == BPF_PROG_TYPE_EXT ?
+	return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ?
 		prog->aux->dst_prog->type : prog->type;
 }
 
-- 
cgit v1.2.3


From 6cc040542ba7b2c60e5119cd04d841fcf048c872 Mon Sep 17 00:00:00 2001
From: Vivek Kasireddy <vivek.kasireddy@intel.com>
Date: Sun, 23 Jun 2024 23:36:09 -0700
Subject: mm/gup: introduce unpin_folio/unpin_folios helpers

Patch series "mm/gup: Introduce memfd_pin_folios() for pinning memfd
folios", v16.

Currently, some drivers (e.g, Udmabuf) that want to longterm-pin the
pages/folios associated with a memfd, do so by simply taking a reference
on them.  This is not desirable because the pages/folios may reside in
Movable zone or CMA block.

Therefore, having drivers use memfd_pin_folios() API ensures that the
folios are appropriately pinned via FOLL_PIN for longterm DMA.

This patchset also introduces a few helpers and converts the Udmabuf
driver to use folios and memfd_pin_folios() API to longterm-pin the folios
for DMA.  Two new Udmabuf selftests are also included to test the driver
and the new API.


This patch (of 9):

These helpers are the folio versions of unpin_user_page/unpin_user_pages.
They are currently only useful for unpinning folios pinned by
memfd_pin_folios() or other associated routines.  However, they could find
new uses in the future, when more and more folio-only helpers are added to
GUP.

We should probably sanity check the folio as part of unpin similar to how
it is done in unpin_user_page/unpin_user_pages but we cannot cleanly do
that at the moment without also checking the subpage.  Therefore, sanity
checking needs to be added to these routines once we have a way to
determine if any given folio is anon-exclusive (via a per folio
AnonExclusive flag).

Link: https://lkml.kernel.org/r/20240624063952.1572359-1-vivek.kasireddy@intel.com
Link: https://lkml.kernel.org/r/20240624063952.1572359-2-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a2e3ebead410..7b84379e3a1b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1584,11 +1584,13 @@ static inline void put_page(struct page *page)
 #define GUP_PIN_COUNTING_BIAS (1U << 10)
 
 void unpin_user_page(struct page *page);
+void unpin_folio(struct folio *folio);
 void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
 				 bool make_dirty);
 void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
 				      bool make_dirty);
 void unpin_user_pages(struct page **pages, unsigned long npages);
+void unpin_folios(struct folio **folios, unsigned long nfolios);
 
 static inline bool is_cow_mapping(vm_flags_t flags)
 {
-- 
cgit v1.2.3


From 89c1905d9c140372b7f50ef48f42378cf85d9bc5 Mon Sep 17 00:00:00 2001
From: Vivek Kasireddy <vivek.kasireddy@intel.com>
Date: Sun, 23 Jun 2024 23:36:11 -0700
Subject: mm/gup: introduce memfd_pin_folios() for pinning memfd folios

For drivers that would like to longterm-pin the folios associated with a
memfd, the memfd_pin_folios() API provides an option to not only pin the
folios via FOLL_PIN but also to check and migrate them if they reside in
movable zone or CMA block.  This API currently works with memfds but it
should work with any files that belong to either shmemfs or hugetlbfs.
Files belonging to other filesystems are rejected for now.

The folios need to be located first before pinning them via FOLL_PIN.  If
they are found in the page cache, they can be immediately pinned.
Otherwise, they need to be allocated using the filesystem specific APIs
and then pinned.

[akpm@linux-foundation.org: improve the CONFIG_MMU=n situation, per SeongJae]
[vivek.kasireddy@intel.com: return -EINVAL if the end offset is greater than the size of memfd]
  Link: https://lkml.kernel.org/r/IA0PR11MB71850525CBC7D541CAB45DF1F8DB2@IA0PR11MB7185.namprd11.prod.outlook.com
Link: https://lkml.kernel.org/r/20240624063952.1572359-4-vivek.kasireddy@intel.com
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> (v2)
Reviewed-by: David Hildenbrand <david@redhat.com> (v3)
Reviewed-by: Christoph Hellwig <hch@lst.de> (v6)
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Junxiao Chang <junxiao.chang@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memfd.h | 5 +++++
 include/linux/mm.h    | 3 +++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/memfd.h b/include/linux/memfd.h
index e7abf6fa4c52..3f2cf339ceaf 100644
--- a/include/linux/memfd.h
+++ b/include/linux/memfd.h
@@ -6,11 +6,16 @@
 
 #ifdef CONFIG_MEMFD_CREATE
 extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg);
+struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx);
 #else
 static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a)
 {
 	return -EINVAL;
 }
+static inline struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
+{
+	return ERR_PTR(-EINVAL);
+}
 #endif
 
 #endif /* __LINUX_MEMFD_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b84379e3a1b..5f1075d19600 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2500,6 +2500,9 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 		    struct page **pages, unsigned int gup_flags);
 long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 		    struct page **pages, unsigned int gup_flags);
+long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end,
+		      struct folio **folios, unsigned int max_folios,
+		      pgoff_t *offset);
 
 int get_user_pages_fast(unsigned long start, int nr_pages,
 			unsigned int gup_flags, struct page **pages);
-- 
cgit v1.2.3


From ed5d583a88a9207b866c14ba834984c6f3c51d23 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 27 Jun 2024 10:08:54 -0700
Subject: fs/procfs: implement efficient VMA querying API for /proc/<pid>/maps

/proc/<pid>/maps file is extremely useful in practice for various tasks
involving figuring out process memory layout, what files are backing any
given memory range, etc.  One important class of applications that
absolutely rely on this are profilers/stack symbolizers (perf tool being
one of them).  Patterns of use differ, but they generally would fall into
two categories.

In on-demand pattern, a profiler/symbolizer would normally capture stack
trace containing absolute memory addresses of some functions, and would
then use /proc/<pid>/maps file to find corresponding backing ELF files
(normally, only executable VMAs are of interest), file offsets within
them, and then continue from there to get yet more information (ELF
symbols, DWARF information) to get human-readable symbolic information.
This pattern is used by Meta's fleet-wide profiler, as one example.

In preprocessing pattern, application doesn't know the set of addresses of
interest, so it has to fetch all relevant VMAs (again, probably only
executable ones), store or cache them, then proceed with profiling and
stack trace capture.  Once done, it would do symbolization based on stored
VMA information.  This can happen at much later point in time.  This
patterns is used by perf tool, as an example.

In either case, there are both performance and correctness requirement
involved.  This address to VMA information translation has to be done as
efficiently as possible, but also not miss any VMA (especially in the case
of loading/unloading shared libraries).  In practice, correctness can't be
guaranteed (due to process dying before VMA data can be captured, or
shared library being unloaded, etc), but any effort to maximize the chance
of finding the VMA is appreciated.

Unfortunately, for all the /proc/<pid>/maps file universality and
usefulness, it doesn't fit the above use cases 100%.

First, it's main purpose is to emit all VMAs sequentially, but in practice
captured addresses would fall only into a smaller subset of all process'
VMAs, mainly containing executable text.  Yet, library would need to parse
most or all of the contents to find needed VMAs, as there is no way to
skip VMAs that are of no use.  Efficient library can do the linear pass
and it is still relatively efficient, but it's definitely an overhead that
can be avoided, if there was a way to do more targeted querying of the
relevant VMA information.

Second, it's a text based interface, which makes its programmatic use from
applications and libraries more cumbersome and inefficient due to the need
to handle text parsing to get necessary pieces of information.  The
overhead is actually payed both by kernel, formatting originally binary
VMA data into text, and then by user space application, parsing it back
into binary data for further use.

For the on-demand pattern of usage, described above, another problem when
writing generic stack trace symbolization library is an unfortunate
performance-vs-correctness tradeoff that needs to be made.  Library has to
make a decision to either cache parsed contents of /proc/<pid>/maps (after
initial processing) to service future requests (if application requests to
symbolize another set of addresses (for the same process), captured at
some later time, which is typical for periodic/continuous profiling cases)
to avoid higher costs of re-parsing this file.  Or it has to choose to
cache the contents in memory to speed up future requests.  In the former
case, more memory is used for the cache and there is a risk of getting
stale data if application loads or unloads shared libraries, or otherwise
changed its set of VMAs somehow, e.g., through additional mmap() calls.
In the latter case, it's the performance hit that comes from re-opening
the file and re-parsing its contents all over again.

This patch aims to solve this problem by providing a new API built on top
of /proc/<pid>/maps.  It's meant to address both non-selectiveness and
text nature of /proc/<pid>/maps, by giving user more control of what sort
of VMA(s) needs to be queried, and being binary-based interface eliminates
the overhead of text formatting (on kernel side) and parsing (on user
space side).

It's also designed to be extensible and forward/backward compatible by
including required struct size field, which user has to provide.  We use
established copy_struct_from_user() approach to handle extensibility.

User has a choice to pick either getting VMA that covers provided address
or -ENOENT if none is found (exact, least surprising, case).  Or, with an
extra query flag (PROCMAP_QUERY_COVERING_OR_NEXT_VMA), they can get either
VMA that covers the address (if there is one), or the closest next VMA
(i.e., VMA with the smallest vm_start > addr).  The latter allows more
efficient use, but, given it could be a surprising behavior, requires an
explicit opt-in.

There is another query flag that is useful for some use cases.
PROCMAP_QUERY_FILE_BACKED_VMA instructs this API to only return
file-backed VMAs.  Combining this with PROCMAP_QUERY_COVERING_OR_NEXT_VMA
makes it possible to efficiently iterate only file-backed VMAs of the
process, which is what profilers/symbolizers are normally interested in.

All the above querying flags can be combined with (also optional) set of
desired VMA permissions flags.  This allows to, for example, iterate only
an executable subset of VMAs, which is what preprocessing pattern, used by
perf tool, would benefit from, as the assumption is that captured stack
traces would have addresses of executable code.  This saves time by
skipping non-executable VMAs altogether efficienty.

All these querying flags (modifiers) are orthogonal and can be combined in
a semantically meaningful and natural way.

Basing this ioctl()-based API on top of /proc/<pid>/maps's FD makes sense
given it's querying the same set of VMA data.  It's also benefitial
because permission checks for /proc/<pid>/maps is performed at open time
once, and the actual data read of text contents of /proc/<pid>/maps is
done without further permission checks.  We piggyback on this pattern with
ioctl()-based API as well, as that's a desired property.  Both for
performance reasons, but also for security and flexibility reasons.

Allowing application to open an FD for /proc/self/maps without any extra
capabilities, and then passing it to some sort of profiling agent through
Unix-domain socket, would allow such profiling agent to not require some
of the capabilities that are otherwise expected when opening
/proc/<pid>/maps file for *another* process.  This is a desirable property
for some more restricted setups.

This new ioctl-based implementation doesn't interfere with seq_file-based
implementation of /proc/<pid>/maps textual interface, and so could be used
together or independently without paying any price for that.

Note also, that fetching VMA name (e.g., backing file path, or special
hard-coded or user-provided names) is optional just like build ID.  If
user sets vma_name_size to zero, kernel code won't attempt to retrieve it,
saving resources.

Earlier versions of this patch set were adding per-VMA locking, which is
why we have a code structure that is ready for abstracting mmap_lock vs
vm_lock differences (query_vma_setup(), query_vma_teardown(), and
query_vma_find_by_addr()), but given anon_vma_name() is not yet compatible
with per-VMA locking, initial implementation sticks to using only
mmap_lock for now.  It will be easy to add back per-VMA locking once all
the pieces are ready later on.  Which is why we keep existing code
structure with setup/teardown/query helper functions.

[andrii@kernel.org: improve PROCMAP_QUERY's compat mode handling]
  Link: https://lkml.kernel.org/r/20240701174805.1897344-2-andrii@kernel.org
Link: https://lkml.kernel.org/r/20240627170900.1672542-3-andrii@kernel.org
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/uapi/linux/fs.h | 130 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 129 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 45e4e64fd664..5d440f9b5d92 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -333,8 +333,10 @@ typedef int __bitwise __kernel_rwf_t;
 #define RWF_SUPPORTED	(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
 			 RWF_APPEND | RWF_NOAPPEND)
 
+#define PROCFS_IOCTL_MAGIC 'f'
+
 /* Pagemap ioctl */
-#define PAGEMAP_SCAN	_IOWR('f', 16, struct pm_scan_arg)
+#define PAGEMAP_SCAN	_IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg)
 
 /* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
 #define PAGE_IS_WPALLOWED	(1 << 0)
@@ -393,4 +395,130 @@ struct pm_scan_arg {
 	__u64 return_mask;
 };
 
+/* /proc/<pid>/maps ioctl */
+#define PROCMAP_QUERY	_IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query)
+
+enum procmap_query_flags {
+	/*
+	 * VMA permission flags.
+	 *
+	 * Can be used as part of procmap_query.query_flags field to look up
+	 * only VMAs satisfying specified subset of permissions. E.g., specifying
+	 * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs,
+	 * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only
+	 * return read/write VMAs, though both executable/non-executable and
+	 * private/shared will be ignored.
+	 *
+	 * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags
+	 * field to specify actual VMA permissions.
+	 */
+	PROCMAP_QUERY_VMA_READABLE		= 0x01,
+	PROCMAP_QUERY_VMA_WRITABLE		= 0x02,
+	PROCMAP_QUERY_VMA_EXECUTABLE		= 0x04,
+	PROCMAP_QUERY_VMA_SHARED		= 0x08,
+	/*
+	 * Query modifier flags.
+	 *
+	 * By default VMA that covers provided address is returned, or -ENOENT
+	 * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest
+	 * VMA with vma_start > addr will be returned if no covering VMA is
+	 * found.
+	 *
+	 * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that
+	 * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA
+	 * to iterate all VMAs with file backing.
+	 */
+	PROCMAP_QUERY_COVERING_OR_NEXT_VMA	= 0x10,
+	PROCMAP_QUERY_FILE_BACKED_VMA		= 0x20,
+};
+
+/*
+ * Input/output argument structured passed into ioctl() call. It can be used
+ * to query a set of VMAs (Virtual Memory Areas) of a process.
+ *
+ * Each field can be one of three kinds, marked in a short comment to the
+ * right of the field:
+ *   - "in", input argument, user has to provide this value, kernel doesn't modify it;
+ *   - "out", output argument, kernel sets this field with VMA data;
+ *   - "in/out", input and output argument; user provides initial value (used
+ *     to specify maximum allowable buffer size), and kernel sets it to actual
+ *     amount of data written (or zero, if there is no data).
+ *
+ * If matching VMA is found (according to criterias specified by
+ * query_addr/query_flags, all the out fields are filled out, and ioctl()
+ * returns 0. If there is no matching VMA, -ENOENT will be returned.
+ * In case of any other error, negative error code other than -ENOENT is
+ * returned.
+ *
+ * Most of the data is similar to the one returned as text in /proc/<pid>/maps
+ * file, but procmap_query provides more querying flexibility. There are no
+ * consistency guarantees between subsequent ioctl() calls, but data returned
+ * for matched VMA is self-consistent.
+ */
+struct procmap_query {
+	/* Query struct size, for backwards/forward compatibility */
+	__u64 size;
+	/*
+	 * Query flags, a combination of enum procmap_query_flags values.
+	 * Defines query filtering and behavior, see enum procmap_query_flags.
+	 *
+	 * Input argument, provided by user. Kernel doesn't modify it.
+	 */
+	__u64 query_flags;		/* in */
+	/*
+	 * Query address. By default, VMA that covers this address will
+	 * be looked up. PROCMAP_QUERY_* flags above modify this default
+	 * behavior further.
+	 *
+	 * Input argument, provided by user. Kernel doesn't modify it.
+	 */
+	__u64 query_addr;		/* in */
+	/* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */
+	__u64 vma_start;		/* out */
+	__u64 vma_end;			/* out */
+	/* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */
+	__u64 vma_flags;		/* out */
+	/* VMA backing page size granularity. */
+	__u64 vma_page_size;		/* out */
+	/*
+	 * VMA file offset. If VMA has file backing, this specifies offset
+	 * within the file that VMA's start address corresponds to.
+	 * Is set to zero if VMA has no backing file.
+	 */
+	__u64 vma_offset;		/* out */
+	/* Backing file's inode number, or zero, if VMA has no backing file. */
+	__u64 inode;			/* out */
+	/* Backing file's device major/minor number, or zero, if VMA has no backing file. */
+	__u32 dev_major;		/* out */
+	__u32 dev_minor;		/* out */
+	/*
+	 * If set to non-zero value, signals the request to return VMA name
+	 * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix
+	 * appended, if file was unlinked from FS) for matched VMA. VMA name
+	 * can also be some special name (e.g., "[heap]", "[stack]") or could
+	 * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME).
+	 *
+	 * Kernel will set this field to zero, if VMA has no associated name.
+	 * Otherwise kernel will return actual amount of bytes filled in
+	 * user-supplied buffer (see vma_name_addr field below), including the
+	 * terminating zero.
+	 *
+	 * If VMA name is longer that user-supplied maximum buffer size,
+	 * -E2BIG error is returned.
+	 *
+	 * If this field is set to non-zero value, vma_name_addr should point
+	 * to valid user space memory buffer of at least vma_name_size bytes.
+	 * If set to zero, vma_name_addr should be set to zero as well
+	 */
+	__u32 vma_name_size;		/* in/out */
+	/*
+	 * User-supplied address of a buffer of at least vma_name_size bytes
+	 * for kernel to fill with matched VMA's name (see vma_name_size field
+	 * description above for details).
+	 *
+	 * Should be set to zero if VMA name should not be returned.
+	 */
+	__u64 vma_name_addr;		/* in */
+};
+
 #endif /* _UAPI_LINUX_FS_H */
-- 
cgit v1.2.3


From bfc69fd05ef9b6416f4811aafafb7f2b34daa000 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 27 Jun 2024 10:08:55 -0700
Subject: fs/procfs: add build ID fetching to PROCMAP_QUERY API

The need to get ELF build ID reliably is an important aspect when dealing
with profiling and stack trace symbolization, and /proc/<pid>/maps textual
representation doesn't help with this.

To get backing file's ELF build ID, application has to first resolve VMA,
then use it's start/end address range to follow a special
/proc/<pid>/map_files/<start>-<end> symlink to open the ELF file (this is
necessary because backing file might have been removed from the disk or
was already replaced with another binary in the same file path.

Such approach, beyond just adding complexity of having to do a bunch of
extra work, has extra security implications.  Because application opens
underlying ELF file and needs read access to its entire contents (as far
as kernel is concerned), kernel puts additional capable() checks on
following /proc/<pid>/map_files/<start>-<end> symlink.  And that makes
sense in general.

But in the case of build ID, profiler/symbolizer doesn't need the contents
of ELF file, per se.  It's only build ID that is of interest, and ELF
build ID itself doesn't provide any sensitive information.

So this patch adds a way to request backing file's ELF build ID along the
rest of VMA information in the same API.  User has control over whether
this piece of information is requested or not by either setting
build_id_size field to zero or non-zero maximum buffer size they provided
through build_id_addr field (which encodes user pointer as __u64 field).
This is a completely optional piece of information, and so has no
performance implications for user cases that don't care about build ID,
while improving performance and simplifying the setup for those
application that do need it.

Kernel already implements build ID fetching, which is used from BPF
subsystem.  We are reusing this code here, but plan a follow up changes to
make it work better under more relaxed assumption (compared to what
existing code assumes) of being called from user process context, in which
page faults are allowed.  BPF-specific implementation currently bails out
if necessary part of ELF file is not paged in, all due to extra
BPF-specific restrictions (like the need to fetch build ID in restrictive
contexts such as NMI handler).

[andrii@kernel.org: fix integer to pointer cast warning in do_procmap_query()]
  Link: https://lkml.kernel.org/r/20240701174805.1897344-1-andrii@kernel.org
Link: https://lkml.kernel.org/r/20240627170900.1672542-4-andrii@kernel.org
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/uapi/linux/fs.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 5d440f9b5d92..2a4a5f50c98e 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -511,6 +511,26 @@ struct procmap_query {
 	 * If set to zero, vma_name_addr should be set to zero as well
 	 */
 	__u32 vma_name_size;		/* in/out */
+	/*
+	 * If set to non-zero value, signals the request to extract and return
+	 * VMA's backing file's build ID, if the backing file is an ELF file
+	 * and it contains embedded build ID.
+	 *
+	 * Kernel will set this field to zero, if VMA has no backing file,
+	 * backing file is not an ELF file, or ELF file has no build ID
+	 * embedded.
+	 *
+	 * Build ID is a binary value (not a string). Kernel will set
+	 * build_id_size field to exact number of bytes used for build ID.
+	 * If build ID is requested and present, but needs more bytes than
+	 * user-supplied maximum buffer size (see build_id_addr field below),
+	 * -E2BIG error will be returned.
+	 *
+	 * If this field is set to non-zero value, build_id_addr should point
+	 * to valid user space memory buffer of at least build_id_size bytes.
+	 * If set to zero, build_id_addr should be set to zero as well
+	 */
+	__u32 build_id_size;		/* in/out */
 	/*
 	 * User-supplied address of a buffer of at least vma_name_size bytes
 	 * for kernel to fill with matched VMA's name (see vma_name_size field
@@ -519,6 +539,14 @@ struct procmap_query {
 	 * Should be set to zero if VMA name should not be returned.
 	 */
 	__u64 vma_name_addr;		/* in */
+	/*
+	 * User-supplied address of a buffer of at least build_id_size bytes
+	 * for kernel to fill with matched VMA's ELF build ID, if available
+	 * (see build_id_size field description above for details).
+	 *
+	 * Should be set to zero if build ID should not be returned.
+	 */
+	__u64 build_id_addr;		/* in */
 };
 
 #endif /* _UAPI_LINUX_FS_H */
-- 
cgit v1.2.3


From f216c845f3c772e54d27fe209fd300b10e7bf54a Mon Sep 17 00:00:00 2001
From: Lance Yang <ioworker0@gmail.com>
Date: Fri, 28 Jun 2024 21:07:49 +0800
Subject: mm: add per-order mTHP split counters

Patch series "mm: introduce per-order mTHP split counters", v3.

At present, the split counters in THP statistics no longer include
PTE-mapped mTHP.  Therefore, we want to introduce per-order mTHP split
counters to monitor the frequency of mTHP splits.  This will assist
developers in better analyzing and optimizing system performance.

/sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
        split
        split_failed
        split_deferred


This patch (of 2):

Currently, the split counters in THP statistics no longer include
PTE-mapped mTHP.  Therefore, we propose introducing per-order mTHP split
counters to monitor the frequency of mTHP splits.  This will help
developers better analyze and optimize system performance.

/sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
        split
        split_failed
        split_deferred

[ioworker0@gmail.com: make things more readable, per Barry and Baolin]
  Link: https://lkml.kernel.org/r/20240704012905.42971-2-ioworker0@gmail.com
[ioworker0@gmail.com: use == for `order' test, per David]
  Link: https://lkml.kernel.org/r/20240705113119.82210-1-ioworker0@gmail.com
Link: https://lkml.kernel.org/r/20240704012905.42971-1-ioworker0@gmail.com
Link: https://lkml.kernel.org/r/20240704012905.42971-2-ioworker0@gmail.com
Link: https://lkml.kernel.org/r/20240628130750.73097-1-ioworker0@gmail.com
Link: https://lkml.kernel.org/r/20240628130750.73097-2-ioworker0@gmail.com
Signed-off-by: Mingzhe Yang <mingzhe.yang@ly.com>
Signed-off-by: Lance Yang <ioworker0@gmail.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Acked-by: Barry Song <baohua@kernel.org>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Bang Li <libang.li@antgroup.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 212cca384d7e..cee3c5da8f0e 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -284,6 +284,9 @@ enum mthp_stat_item {
 	MTHP_STAT_FILE_ALLOC,
 	MTHP_STAT_FILE_FALLBACK,
 	MTHP_STAT_FILE_FALLBACK_CHARGE,
+	MTHP_STAT_SPLIT,
+	MTHP_STAT_SPLIT_FAILED,
+	MTHP_STAT_SPLIT_DEFERRED,
 	__MTHP_STAT_COUNT
 };
 
-- 
cgit v1.2.3


From 18d095b2556e5e1292003c8e9f5d845ed42ef89b Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 2 Jul 2024 15:51:19 +0200
Subject: mm: define __pte_leaf_size() to also take a PMD entry

On powerpc 8xx, when a page is 8M size, the information is in the PMD
entry.  So allow architectures to provide __pte_leaf_size() instead of
pte_leaf_size() and provide the PMD entry to that function.

When __pte_leaf_size() is not defined, define it as a pte_leaf_size() so
that architectures not interested in the PMD arguments are not impacted.

Only define a default pte_leaf_size() when __pte_leaf_size() is not
defined to make sure nobody adds new calls to pte_leaf_size() in the core.

Link: https://lkml.kernel.org/r/c7c008f0a314bf8029ad7288fdc908db1ec7e449.1719928057.git.christophe.leroy@csgroup.eu
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 2f32eaccf0b9..2a6a3cccfc36 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1907,9 +1907,12 @@ typedef unsigned int pgtbl_mod_mask;
 #ifndef pmd_leaf_size
 #define pmd_leaf_size(x) PMD_SIZE
 #endif
+#ifndef __pte_leaf_size
 #ifndef pte_leaf_size
 #define pte_leaf_size(x) PAGE_SIZE
 #endif
+#define __pte_leaf_size(x,y) pte_leaf_size(y)
+#endif
 
 /*
  * We always define pmd_pfn for all archs as it's used in lots of generic
-- 
cgit v1.2.3


From e6c0c03245b14d6c205814aee67128257d0bea84 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 2 Jul 2024 15:51:20 +0200
Subject: mm: provide mm_struct and address to huge_ptep_get()

On powerpc 8xx huge_ptep_get() will need to know whether the given ptep is
a PTE entry or a PMD entry.  This cannot be known with the PMD entry
itself because there is no easy way to know it from the content of the
entry.

So huge_ptep_get() will need to know either the size of the page or get
the pmd.

In order to be consistent with huge_ptep_get_and_clear(), give mm and
address to huge_ptep_get().

Link: https://lkml.kernel.org/r/cc00c70dd384298796a4e1b25d6c4eb306d3af85.1719928057.git.christophe.leroy@csgroup.eu
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/asm-generic/hugetlb.h | 2 +-
 include/linux/swapops.h       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 6dcf4d576970..594d5905f615 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -144,7 +144,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 #endif
 
 #ifndef __HAVE_ARCH_HUGE_PTEP_GET
-static inline pte_t huge_ptep_get(pte_t *ptep)
+static inline pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	return ptep_get(ptep);
 }
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index a5c560a2f8c2..cb468e418ea1 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -334,7 +334,7 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
 
 extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 					unsigned long address);
-extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte);
+extern void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *pte);
 #else  /* CONFIG_MIGRATION */
 static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
 {
@@ -359,7 +359,7 @@ static inline int is_migration_entry(swp_entry_t swp)
 static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 					unsigned long address) { }
 static inline void migration_entry_wait_huge(struct vm_area_struct *vma,
-					pte_t *pte) { }
+					     unsigned long addr, pte_t *pte) { }
 static inline int is_writable_migration_entry(swp_entry_t entry)
 {
 	return 0;
-- 
cgit v1.2.3


From 8268614b408be6b76c1cee6f67de7deb0d6593b3 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Tue, 2 Jul 2024 15:51:35 +0200
Subject: mm: remove CONFIG_ARCH_HAS_HUGEPD

powerpc was the only user of CONFIG_ARCH_HAS_HUGEPD and doesn't use it
anymore, so remove all related code.

Link: https://lkml.kernel.org/r/4b10c54c794780b955f3ad6c657d0199dd792146.1719928057.git.christophe.leroy@csgroup.eu
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Oscar Salvador <osalvador@suse.de>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a951c0d06061..ecd0ceeea206 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -20,12 +20,6 @@ struct user_struct;
 struct mmu_gather;
 struct node;
 
-#ifndef CONFIG_ARCH_HAS_HUGEPD
-typedef struct { unsigned long pd; } hugepd_t;
-#define is_hugepd(hugepd) (0)
-#define __hugepd(x) ((hugepd_t) { (x) })
-#endif
-
 void free_huge_folio(struct folio *folio);
 
 #ifdef CONFIG_HUGETLB_PAGE
-- 
cgit v1.2.3


From 3b0ba54d5f8ff60553c01d3ec3c607ab7bb3b452 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 3 Jul 2024 10:42:25 -0700
Subject: mm: add comments for allocation helpers explaining why they are
 macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A number of allocation helper functions were converted into macros to
account them at the call sites.  Add a comment for each converted
allocation helper explaining why it has to be a macro and why we typecast
the return value wherever required.  The patch also moves
acpi_os_acquire_object() closer to other allocation helpers to group them
together under the same comment.  The patch has no functional changes.

Link: https://lkml.kernel.org/r/20240703174225.3891393-1-surenb@google.com
Fixes: 2c321f3f70bc ("mm: change inlined allocation helpers to account at the call site")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Christian König <christian.koenig@amd.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jan Kara <jack@suse.cz>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Thorsten Blum <thorsten.blum@toblux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/acpi/platform/aclinuxex.h |  9 ++++++---
 include/linux/bpf.h               |  4 ++++
 include/linux/dma-fence-chain.h   |  4 ++++
 include/linux/hid_bpf.h           |  5 +++++
 include/linux/jbd2.h              | 10 ++++++++++
 include/linux/skbuff.h            |  8 ++++++++
 include/linux/skmsg.h             |  5 +++++
 7 files changed, 42 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/acpi/platform/aclinuxex.h b/include/acpi/platform/aclinuxex.h
index 62cac266a1c8..eeff40295b4b 100644
--- a/include/acpi/platform/aclinuxex.h
+++ b/include/acpi/platform/aclinuxex.h
@@ -46,6 +46,9 @@ acpi_status acpi_os_terminate(void);
  * Interrupts are off during resume, just like they are for boot.
  * However, boot has  (system_state != SYSTEM_RUNNING)
  * to quiet __might_sleep() in kmalloc() and resume does not.
+ *
+ * These specialized allocators have to be macros for their allocations to be
+ * accounted separately (to have separate alloc_tag).
  */
 #define acpi_os_allocate(_size)	\
 		kmalloc(_size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL)
@@ -53,14 +56,14 @@ acpi_status acpi_os_terminate(void);
 #define acpi_os_allocate_zeroed(_size)	\
 		kzalloc(_size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL)
 
+#define acpi_os_acquire_object(_cache)	\
+		kmem_cache_zalloc(_cache, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL)
+
 static inline void acpi_os_free(void *memory)
 {
 	kfree(memory);
 }
 
-#define acpi_os_acquire_object(_cache)	\
-		kmem_cache_zalloc(_cache, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL)
-
 static inline acpi_thread_id acpi_os_get_thread_id(void)
 {
 	return (acpi_thread_id) (unsigned long)current;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b8637555c9c2..50b82ff4551a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2261,6 +2261,10 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
 void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
 				    size_t align, gfp_t flags);
 #else
+/*
+ * These specialized allocators have to be macros for their allocations to be
+ * accounted separately (to have separate alloc_tag).
+ */
 #define bpf_map_kmalloc_node(_map, _size, _flags, _node)	\
 		kmalloc_node(_size, _flags, _node)
 #define bpf_map_kzalloc(_map, _size, _flags)			\
diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
index ad9e2506c2f4..68c3c1e41014 100644
--- a/include/linux/dma-fence-chain.h
+++ b/include/linux/dma-fence-chain.h
@@ -85,6 +85,10 @@ dma_fence_chain_contained(struct dma_fence *fence)
  * dma_fence_chain_alloc
  *
  * Returns a new struct dma_fence_chain object or NULL on failure.
+ *
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag). The typecast is
+ * intentional to enforce typesafety.
  */
 #define dma_fence_chain_alloc()	\
 		((struct dma_fence_chain *)kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL))
diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index eec2592dec12..99a3edb6cf07 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -152,6 +152,11 @@ static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; }
 static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {}
 static inline void hid_bpf_destroy_device(struct hid_device *hid) {}
 static inline void hid_bpf_device_init(struct hid_device *hid) {}
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag). The typecast is
+ * intentional to enforce typesafety.
+ */
 #define call_hid_bpf_rdesc_fixup(_hdev, _rdesc, _size)	\
 		((u8 *)kmemdup(_rdesc, *(_size), GFP_KERNEL))
 
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index ab04c1c27fae..a280f42c8c76 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1588,6 +1588,11 @@ void jbd2_journal_put_journal_head(struct journal_head *jh);
  */
 extern struct kmem_cache *jbd2_handle_cache;
 
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag). The typecast is
+ * intentional to enforce typesafety.
+ */
 #define jbd2_alloc_handle(_gfp_flags)	\
 		((handle_t *)kmem_cache_zalloc(jbd2_handle_cache, _gfp_flags))
 
@@ -1602,6 +1607,11 @@ static inline void jbd2_free_handle(handle_t *handle)
  */
 extern struct kmem_cache *jbd2_inode_cache;
 
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag). The typecast is
+ * intentional to enforce typesafety.
+ */
 #define jbd2_alloc_inode(_gfp_flags)	\
 		((struct jbd2_inode *)kmem_cache_alloc(jbd2_inode_cache, _gfp_flags))
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1c2902eaebd3..fd51f2de51da 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3400,6 +3400,10 @@ static inline struct page *__dev_alloc_pages_noprof(gfp_t gfp_mask,
 }
 #define __dev_alloc_pages(...)	alloc_hooks(__dev_alloc_pages_noprof(__VA_ARGS__))
 
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag).
+ */
 #define dev_alloc_pages(_order) __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, _order)
 
 /**
@@ -3416,6 +3420,10 @@ static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask)
 }
 #define __dev_alloc_page(...)	alloc_hooks(__dev_alloc_page_noprof(__VA_ARGS__))
 
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag).
+ */
 #define dev_alloc_page()	dev_alloc_pages(0)
 
 /**
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index c9efda9df285..d9b03e0746e7 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -414,6 +414,11 @@ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
 int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
 			 struct sk_msg *msg);
 
+/*
+ * This specialized allocator has to be a macro for its allocations to be
+ * accounted separately (to have a separate alloc_tag). The typecast is
+ * intentional to enforce typesafety.
+ */
 #define sk_psock_init_link()	\
 		((struct sk_psock_link *)kzalloc(sizeof(struct sk_psock_link),	\
 						 GFP_ATOMIC | __GFP_NOWARN))
-- 
cgit v1.2.3


From a8585ac68621983587f1701b7567978fcbcd9573 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Wed, 3 Jul 2024 13:25:10 +0200
Subject: mm/page_counter: move calculating protection values to page_counter

It's a lot of math, and there is nothing memcontrol specific about it.
This makes it easier to use inside of the drm cgroup controller.

[akpm@linux-foundation.org: fix kerneldoc, per Jeff Johnson]
Link: https://lkml.kernel.org/r/20240703112510.36424-1-maarten.lankhorst@linux.intel.com
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Jeff Johnson <quic_jjohnson@quicinc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page_counter.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 8cd858d912c4..904c52f97284 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -81,4 +81,8 @@ static inline void page_counter_reset_watermark(struct page_counter *counter)
 	counter->watermark = page_counter_read(counter);
 }
 
+void page_counter_calculate_protection(struct page_counter *root,
+				       struct page_counter *counter,
+				       bool recursive_protection);
+
 #endif /* _LINUX_PAGE_COUNTER_H */
-- 
cgit v1.2.3


From 823430c8e9d98c5865af518c782d0493b76aa511 Mon Sep 17 00:00:00 2001
From: "Ho-Ren (Jack) Chuang" <horen.chuang@linux.dev>
Date: Thu, 4 Jul 2024 07:26:44 +0000
Subject: memory tier: consolidate the initialization of memory tiers

The current memory tier initialization process is distributed across two
different functions, memory_tier_init() and memory_tier_late_init().  This
design is hard to maintain.  Thus, this patch is proposed to reduce the
possible code paths by consolidating different initialization patches into
one.

The earlier discussion with Jonathan and Ying is listed here:
https://lore.kernel.org/lkml/20240405150244.00004b49@Huawei.com/

If we want to put these two initializations together, they must be placed
together in the later function.  Because only at that time, the HMAT
information will be ready, adist between nodes can be calculated, and
memory tiering can be established based on the adist.  So we position the
initialization at memory_tier_init() to the memory_tier_late_init() call.
Moreover, it's natural to keep memory_tier initialization in drivers at
device_initcall() level.

If we simply move the set_node_memory_tier() from memory_tier_init() to
late_initcall(), it will result in HMAT not registering the
mt_adistance_algorithm callback function, because set_node_memory_tier()
is not performed during the memory tiering initialization phase, leading
to a lack of correct default_dram information.

Therefore, we introduced a nodemask to pass the information of the default
DRAM nodes.  The reason for not choosing to reuse default_dram_type->nodes
is that it is not clean enough.  So in the end, we use a __initdata
variable, which is a variable that is released once initialization is
complete, including both CPU and memory nodes for HMAT to iterate through.

Link: https://lkml.kernel.org/r/20240704072646.437579-1-horen.chuang@linux.dev
Signed-off-by: Ho-Ren (Jack) Chuang <horenchuang@bytedance.com>
Suggested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Gregory Price <gourry.memverge@gmail.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Ravi Jonnalagadda <ravis.opensrc@micron.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memory-tiers.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 0d70788558f4..0dc0cf2863e2 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -38,6 +38,7 @@ struct access_coordinate;
 #ifdef CONFIG_NUMA
 extern bool numa_demotion_enabled;
 extern struct memory_dev_type *default_dram_type;
+extern nodemask_t default_dram_nodes;
 struct memory_dev_type *alloc_memory_type(int adistance);
 void put_memory_type(struct memory_dev_type *memtype);
 void init_node_memory_type(int node, struct memory_dev_type *default_type);
@@ -76,6 +77,7 @@ static inline bool node_is_toptier(int node)
 
 #define numa_demotion_enabled	false
 #define default_dram_type	NULL
+#define default_dram_nodes	NODE_MASK_NONE
 /*
  * CONFIG_NUMA implementation returns non NULL error.
  */
-- 
cgit v1.2.3


From 00f58104202c472e487f0866fbd38832523fd4f9 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Thu, 4 Jul 2024 10:10:50 +0100
Subject: mm: fix khugepaged activation policy

Since the introduction of mTHP, the docuementation has stated that
khugepaged would be enabled when any mTHP size is enabled, and disabled
when all mTHP sizes are disabled.  There are 2 problems with this; 1.
this is not what was implemented by the code and 2.  this is not the
desirable behavior.

Desirable behavior is for khugepaged to be enabled when any PMD-sized THP
is enabled, anon or file.  (Note that file THP is still controlled by the
top-level control so we must always consider that, as well as the PMD-size
mTHP control for anon).  khugepaged only supports collapsing to PMD-sized
THP so there is no value in enabling it when PMD-sized THP is disabled.
So let's change the code and documentation to reflect this policy.

Further, per-size enabled control modification events were not previously
forwarded to khugepaged to give it an opportunity to start or stop.
Consequently the following was resulting in khugepaged eroneously not
being activated:

  echo never > /sys/kernel/mm/transparent_hugepage/enabled
  echo always > /sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled

[ryan.roberts@arm.com: v3]
  Link: https://lkml.kernel.org/r/20240705102849.2479686-1-ryan.roberts@arm.com
Link: https://lkml.kernel.org/r/20240705102849.2479686-1-ryan.roberts@arm.com
Link: https://lkml.kernel.org/r/20240704091051.2411934-1-ryan.roberts@arm.com
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Fixes: 3485b88390b0 ("mm: thp: introduce multi-size THP sysfs interface")
Closes: https://lore.kernel.org/linux-mm/7a0bbe69-1e3d-4263-b206-da007791a5c4@redhat.com/
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index cee3c5da8f0e..acb6ac24a07e 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -128,18 +128,6 @@ static inline bool hugepage_global_always(void)
 			(1<<TRANSPARENT_HUGEPAGE_FLAG);
 }
 
-static inline bool hugepage_flags_enabled(void)
-{
-	/*
-	 * We cover both the anon and the file-backed case here; we must return
-	 * true if globally enabled, even when all anon sizes are set to never.
-	 * So we don't need to look at huge_anon_orders_inherit.
-	 */
-	return hugepage_global_enabled() ||
-	       READ_ONCE(huge_anon_orders_always) ||
-	       READ_ONCE(huge_anon_orders_madvise);
-}
-
 static inline int highest_order(unsigned long orders)
 {
 	return fls_long(orders) - 1;
-- 
cgit v1.2.3


From 26c7d8413aaf113a54b54f63e151416a5c5c2a88 Mon Sep 17 00:00:00 2001
From: Bang Li <libang.li@antgroup.com>
Date: Fri, 5 Jul 2024 11:23:09 +0800
Subject: mm: thp: support "THPeligible" semantics for mTHP with anonymous
 shmem

After the commit 7fb1b252afb5 ("mm: shmem: add mTHP support for anonymous
shmem"), we can configure different policies through the multi-size THP
sysfs interface for anonymous shmem.  But currently "THPeligible"
indicates only whether the mapping is eligible for allocating THP-pages as
well as the THP is PMD mappable or not for anonymous shmem, we need to
support semantics for mTHP with anonymous shmem similar to those for mTHP
with anonymous memory.

Link: https://lkml.kernel.org/r/20240705032309.24933-1-libang.li@antgroup.com
Signed-off-by: Bang Li <libang.li@antgroup.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/shmem_fs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 3fb18f7eb73e..1d06b1e5408a 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -113,12 +113,21 @@ int shmem_unuse(unsigned int type);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
 			  struct mm_struct *mm, unsigned long vm_flags);
+unsigned long shmem_allowable_huge_orders(struct inode *inode,
+				struct vm_area_struct *vma, pgoff_t index,
+				bool global_huge);
 #else
 static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
 					  struct mm_struct *mm, unsigned long vm_flags)
 {
 	return false;
 }
+static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
+				struct vm_area_struct *vma, pgoff_t index,
+				bool global_huge)
+{
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_SHMEM
-- 
cgit v1.2.3


From 8a78882dac1c8c464e047a29415edb50421651ce Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 8 Jul 2024 11:05:44 +0800
Subject: mm/memory-failure: remove obsolete MF_MSG_DIFFERENT_COMPOUND

The page cannot become compound pages again just after a folio is split as
an extra refcnt is held.  So the MF_MSG_DIFFERENT_COMPOUND case is
obsolete and can be removed to get rid of this false assumption and code
burden.  But add one WARN_ON() here to keep the situation clear.

Link: https://lkml.kernel.org/r/20240708030544.196919-1-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/ras/ras_event.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 9bc707fe8819..e5f7ee0864e7 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -356,7 +356,6 @@ TRACE_EVENT(aer_event,
 #define MF_PAGE_TYPE		\
 	EM ( MF_MSG_KERNEL, "reserved kernel page" )			\
 	EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" )	\
-	EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \
 	EM ( MF_MSG_HUGE, "huge page" )					\
 	EM ( MF_MSG_FREE_HUGE, "free huge page" )			\
 	EM ( MF_MSG_GET_HWPOISON, "get hwpoison page" )			\
-- 
cgit v1.2.3


From 63d9866ab01ffd0d0835d5564107283a4afc0a38 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Wed, 10 Jul 2024 10:55:01 +0100
Subject: mm: shmem: rename mTHP shmem counters

The legacy PMD-sized THP counters at /proc/vmstat include thp_file_alloc,
thp_file_fallback and thp_file_fallback_charge, which rather confusingly
refer to shmem THP and do not include any other types of file pages.  This
is inconsistent since in most other places in the kernel, THP counters are
explicitly separated for anon, shmem and file flavours.  However, we are
stuck with it since it constitutes a user ABI.

Recently, commit 66f44583f9b6 ("mm: shmem: add mTHP counters for anonymous
shmem") added equivalent mTHP stats for shmem, keeping the same "file_"
prefix in the names.  But in future, we may want to add extra stats to
cover actual file pages, at which point, it would all become very
confusing.

So let's take the opportunity to rename these new counters "shmem_" before
the change makes it upstream and the ABI becomes immutable.  While we are
at it, let's improve the documentation for the legacy counters to make it
clear that they count shmem pages only.

Link: https://lkml.kernel.org/r/20240710095503.3193901-1-ryan.roberts@arm.com
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Lance Yang <ioworker0@gmail.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index acb6ac24a07e..cff002be83eb 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -269,9 +269,9 @@ enum mthp_stat_item {
 	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
 	MTHP_STAT_SWPOUT,
 	MTHP_STAT_SWPOUT_FALLBACK,
-	MTHP_STAT_FILE_ALLOC,
-	MTHP_STAT_FILE_FALLBACK,
-	MTHP_STAT_FILE_FALLBACK_CHARGE,
+	MTHP_STAT_SHMEM_ALLOC,
+	MTHP_STAT_SHMEM_FALLBACK,
+	MTHP_STAT_SHMEM_FALLBACK_CHARGE,
 	MTHP_STAT_SPLIT,
 	MTHP_STAT_SPLIT_FAILED,
 	MTHP_STAT_SPLIT_DEFERRED,
-- 
cgit v1.2.3


From 7a7127aa33c9f5b7b54ffa80619f644c5e000846 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 7 Jul 2024 01:05:05 +0900
Subject: init: remove unused __MEMINIT* macros

These macros are not used anywhere.

Link: https://lkml.kernel.org/r/20240706160511.2331061-1-masahiroy@kernel.org
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/init.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/init.h b/include/linux/init.h
index 58cef4c2e59a..b2e9dfff8691 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -99,10 +99,6 @@
 #define __INITRODATA	.section	".init.rodata","a",%progbits
 #define __FINITDATA	.previous
 
-#define __MEMINIT        .section	".meminit.text", "ax"
-#define __MEMINITDATA    .section	".meminit.data", "aw"
-#define __MEMINITRODATA  .section	".meminit.rodata", "a"
-
 /* silence warnings when references are OK */
 #define __REF            .section       ".ref.text", "ax"
 #define __REFDATA        .section       ".ref.data", "aw"
-- 
cgit v1.2.3


From 73db3abdca58c8a014ec4c88cf5ef925cbf63669 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 7 Jul 2024 01:05:06 +0900
Subject: init/modpost: conditionally check section mismatch to __meminit*

This reverts commit eb8f689046b8 ("Use separate sections for __dev/
_cpu/__mem code/data").

Check section mismatch to __meminit* only when CONFIG_MEMORY_HOTPLUG=n.

With this change, the linker script and modpost become simpler, and we
can get rid of the __ref annotations from the memory hotplug code.

[sfr@canb.auug.org.au: remove MEM_KEEP from arch/powerpc/kernel/vmlinux.lds.S]
  Link: https://lkml.kernel.org/r/20240710093213.2aefb25f@canb.auug.org.au
Link: https://lkml.kernel.org/r/20240706160511.2331061-2-masahiroy@kernel.org
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/asm-generic/vmlinux.lds.h | 18 ++----------------
 include/linux/init.h              | 14 +++++++++-----
 2 files changed, 11 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5703526d6ebf..0db89c0aa2cc 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -141,14 +141,6 @@
  * often happens at runtime)
  */
 
-#if defined(CONFIG_MEMORY_HOTPLUG)
-#define MEM_KEEP(sec)    *(.mem##sec)
-#define MEM_DISCARD(sec)
-#else
-#define MEM_KEEP(sec)
-#define MEM_DISCARD(sec) *(.mem##sec)
-#endif
-
 #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
 #define KEEP_PATCHABLE		KEEP(*(__patchable_function_entries))
 #define PATCHABLE_DISCARDS
@@ -357,7 +349,6 @@
 	*(.data..decrypted)						\
 	*(.ref.data)							\
 	*(.data..shared_aligned) /* percpu related */			\
-	MEM_KEEP(init.data*)						\
 	*(.data.unlikely)						\
 	__start_once = .;						\
 	*(.data.once)							\
@@ -542,7 +533,6 @@
 	/* __*init sections */						\
 	__init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) {		\
 		*(.ref.rodata)						\
-		MEM_KEEP(init.rodata)					\
 	}								\
 									\
 	/* Built-in module parameters. */				\
@@ -593,8 +583,7 @@
 		*(.text.unknown .text.unknown.*)			\
 		NOINSTR_TEXT						\
 		*(.ref.text)						\
-		*(.text.asan.* .text.tsan.*)				\
-	MEM_KEEP(init.text*)						\
+		*(.text.asan.* .text.tsan.*)
 
 
 /* sched.text is aling to function alignment to secure we have same
@@ -701,7 +690,6 @@
 #define INIT_DATA							\
 	KEEP(*(SORT(___kentry+*)))					\
 	*(.init.data .init.data.*)					\
-	MEM_DISCARD(init.data*)						\
 	KERNEL_CTORS()							\
 	MCOUNT_REC()							\
 	*(.init.rodata .init.rodata.*)					\
@@ -709,7 +697,6 @@
 	TRACE_SYSCALLS()						\
 	KPROBE_BLACKLIST()						\
 	ERROR_INJECT_WHITELIST()					\
-	MEM_DISCARD(init.rodata)					\
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
 	TIMER_OF_TABLES()						\
@@ -727,8 +714,7 @@
 
 #define INIT_TEXT							\
 	*(.init.text .init.text.*)					\
-	*(.text.startup)						\
-	MEM_DISCARD(init.text*)
+	*(.text.startup)
 
 #define EXIT_DATA							\
 	*(.exit.data .exit.data.*)					\
diff --git a/include/linux/init.h b/include/linux/init.h
index b2e9dfff8691..ee1309473bc6 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -84,11 +84,15 @@
 
 #define __exit          __section(".exit.text") __exitused __cold notrace
 
-/* Used for MEMORY_HOTPLUG */
-#define __meminit        __section(".meminit.text") __cold notrace \
-						  __latent_entropy
-#define __meminitdata    __section(".meminit.data")
-#define __meminitconst   __section(".meminit.rodata")
+#ifdef CONFIG_MEMORY_HOTPLUG
+#define __meminit
+#define __meminitdata
+#define __meminitconst
+#else
+#define __meminit	__init
+#define __meminitdata	__initdata
+#define __meminitconst	__initconst
+#endif
 
 /* For assembly routines */
 #define __HEAD		.section	".head.text","ax"
-- 
cgit v1.2.3


From d69ba6bbaf1f606ac354e925571a54d025e32aae Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 11 Jul 2024 15:07:03 -0700
Subject: net: ethtool: let drivers remove lost RSS contexts

RSS contexts may get lost from a device, in various extreme circumstances.
Specifically if the firmware leaks resources and resets, or crashes and
either recovers in partially working state or the crash causes a
different FW version to run - creating the context again may fail.

Drivers should do their absolute best to prevent this from happening.
When it does, however, telling user that a context exists, when it can't
possibly be used any more is counter productive. Add a helper for
drivers to discard contexts. Print an error, in the future netlink
notification will also be sent.

More robust approaches were proposed, like keeping the contexts
but marking them as "dead" (but possibly resurrected by next reset).
That may be better but it's unclear at this stage whether the
effort is worth the benefits.

Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Link: https://patch.msgid.link/20240711220713.283778-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index e213b5508da6..89da0254ccd4 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -210,6 +210,8 @@ static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size,
 	return struct_size_t(struct ethtool_rxfh_context, data, flex_len);
 }
 
+void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id);
+
 /* declare a link mode bitmap */
 #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name)		\
 	DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS)
-- 
cgit v1.2.3


From 28c8757a792bbbc76407777bd0303862daa75057 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 11 Jul 2024 15:07:04 -0700
Subject: net: ethtool: let drivers declare max size of RSS indir table and key

Some drivers (bnxt but I think also mlx5 from ML discussions) change
the size of the indirection table depending on the number of Rx rings.
Decouple the max table size from the size of the currently used table,
so that we can reserve space in the context for table growth.

Static members in ethtool_ops are good enough for now, we can add
callbacks to read the max size more dynamically if someone needs
that.

Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Link: https://patch.msgid.link/20240711220713.283778-3-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool.h | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 89da0254ccd4..a1ee76936f53 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -181,6 +181,7 @@ struct ethtool_rxfh_context {
 	/* private: driver private data, indirection table, and hash key are
 	 * stored sequentially in @data area.  Use below helpers to access.
 	 */
+	u32 key_off;
 	u8 data[] __aligned(sizeof(void *));
 };
 
@@ -196,18 +197,7 @@ static inline u32 *ethtool_rxfh_context_indir(struct ethtool_rxfh_context *ctx)
 
 static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx)
 {
-	return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size);
-}
-
-static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size,
-					       u16 priv_size)
-{
-	size_t indir_bytes = array_size(indir_size, sizeof(u32));
-	size_t flex_len;
-
-	flex_len = size_add(size_add(indir_bytes, key_size),
-			    ALIGN(priv_size, sizeof(u32)));
-	return struct_size_t(struct ethtool_rxfh_context, data, flex_len);
+	return &ctx->data[ctx->key_off];
 }
 
 void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id);
@@ -723,6 +713,10 @@ struct ethtool_rxfh_param {
  *	contexts.
  * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor
  *	RSS.
+ * @rxfh_indir_space: max size of RSS indirection tables, if indirection table
+ *	size as returned by @get_rxfh_indir_size may change during lifetime
+ *	of the device. Leave as 0 if the table size is constant.
+ * @rxfh_key_space: same as @rxfh_indir_space, but for the key.
  * @rxfh_priv_size: size of the driver private data area the core should
  *	allocate for an RSS context (in &struct ethtool_rxfh_context).
  * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID.  If this
@@ -940,6 +934,8 @@ struct ethtool_ops {
 	u32     cap_link_lanes_supported:1;
 	u32     cap_rss_ctx_supported:1;
 	u32	cap_rss_sym_xor_supported:1;
+	u32	rxfh_indir_space;
+	u16	rxfh_key_space;
 	u16	rxfh_priv_size;
 	u32	rxfh_max_context_id;
 	u32	supported_coalesce_params;
-- 
cgit v1.2.3


From 5d89b5bdbce3937c86f05ffe19455c3068fd94f7 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Fri, 12 Jul 2024 11:52:40 +0200
Subject: i2c: document new callbacks in i2c_algorithm

When updating the callbacks, adding their kernel-doc was forgotten. Add
it now.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/r/20240712165527.75e4ddc9@canb.auug.org.au
Fixes: a93c2e5fe766 ("i2c: reword i2c_algorithm according to newest specification")
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c.h | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 9d45b7b912dd..e9cc14b1f9a1 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -513,10 +513,10 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info,
 
 /**
  * struct i2c_algorithm - represent I2C transfer method
- * @master_xfer: Issue a set of i2c transactions to the given I2C adapter
+ * @xfer: Issue a set of i2c transactions to the given I2C adapter
  *   defined by the msgs array, with num messages available to transfer via
  *   the adapter specified by adap.
- * @master_xfer_atomic: same as @master_xfer. Yet, only using atomic context
+ * @xfer_atomic: same as @xfer. Yet, only using atomic context
  *   so e.g. PMICs can be accessed very late before shutdown. Optional.
  * @smbus_xfer: Issue smbus transactions to the given I2C adapter. If this
  *   is not present, then the bus layer will try and convert the SMBus calls
@@ -525,27 +525,33 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info,
  *   so e.g. PMICs can be accessed very late before shutdown. Optional.
  * @functionality: Return the flags that this algorithm/adapter pair supports
  *   from the ``I2C_FUNC_*`` flags.
- * @reg_slave: Register given client to I2C slave mode of this adapter
- * @unreg_slave: Unregister given client from I2C slave mode of this adapter
+ * @reg_target: Register given client to local target mode of this adapter
+ * @unreg_target: Unregister given client from local target mode of this adapter
+ *
+ * @master_xfer: deprecated, use @xfer
+ * @master_xfer_atomic: deprecated, use @xfer_atomic
+ * @reg_slave: deprecated, use @reg_target
+ * @unreg_slave: deprecated, use @unreg_target
+ *
  *
  * The following structs are for those who like to implement new bus drivers:
  * i2c_algorithm is the interface to a class of hardware solutions which can
  * be addressed using the same bus algorithms - i.e. bit-banging or the PCF8584
  * to name two of the most common.
  *
- * The return codes from the ``master_xfer{_atomic}`` fields should indicate the
+ * The return codes from the ``xfer{_atomic}`` fields should indicate the
  * type of error code that occurred during the transfer, as documented in the
  * Kernel Documentation file Documentation/i2c/fault-codes.rst. Otherwise, the
  * number of messages executed should be returned.
  */
 struct i2c_algorithm {
 	/*
-	 * If an adapter algorithm can't do I2C-level access, set master_xfer
+	 * If an adapter algorithm can't do I2C-level access, set xfer
 	 * to NULL. If an adapter algorithm can do SMBus access, set
 	 * smbus_xfer. If set to NULL, the SMBus protocol is simulated
 	 * using common I2C messages.
 	 *
-	 * master_xfer should return the number of messages successfully
+	 * xfer should return the number of messages successfully
 	 * processed, or a negative value on error
 	 */
 	union {
-- 
cgit v1.2.3


From 63c6e08eac8e7e2ad2e883c968e58e23e1dc8c70 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@nvidia.com>
Date: Thu, 11 Jul 2024 17:33:07 -0700
Subject: net/mlx5: IFC updates for SF max IO EQs

Expose a new cap sf_eq_usage. The vhca_resource_manager can write this
cap, indicating the SF driver should use max_num_eqs_24b to determine
how many EQs to use.

Will be used in the next patch, to indicate to the SF driver from the PF
that the user has set the max io eqs via devlink. So the SF driver can
later query the proper max eq value from the new cap.

devlink port function set pci/0000:08:00.0/32768 max_io_eqs 32

Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
Reviewed-by: William Tu <witu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://patch.msgid.link/20240712003310.355106-2-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index fdad0071d599..360d42f041b0 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1994,7 +1994,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   migration_tracking_state[0x1];
 	u8	   reserved_at_ca[0x6];
 	u8	   migration_in_chunks[0x1];
-	u8	   reserved_at_d1[0xf];
+	u8	   reserved_at_d1[0x1];
+	u8	   sf_eq_usage[0x1];
+	u8	   reserved_at_d3[0xd];
 
 	u8	   cross_vhca_object_to_object_supported[0x20];
 
-- 
cgit v1.2.3


From 084ebf7ca83e6cb743784f2eecc654193ce064fb Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Fri, 21 Jun 2024 13:50:43 -0700
Subject: execve: Keep bprm->argmin behind CONFIG_MMU

When argmin was added in commit 655c16a8ce9c ("exec: separate
MM_ANONPAGES and RLIMIT_STACK accounting"), it was intended only for
validating stack limits on CONFIG_MMU[1]. All checking for reaching the
limit (argmin) is wrapped in CONFIG_MMU ifdef checks, though setting
argmin was not. That argmin is only supposed to be used under CONFIG_MMU
was rediscovered recently[2], and I don't want to trip over this again.

Move argmin's declaration into the existing CONFIG_MMU area, and add
helpers functions so the MMU tests can be consolidated.

Link: https://lore.kernel.org/all/20181126122307.GA1660@redhat.com [1]
Link: https://lore.kernel.org/all/202406211253.7037F69@keescook/ [2]
Link: https://lore.kernel.org/r/20240621205046.4001362-1-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/binfmts.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 70f97f685bff..e6c00e860951 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -19,13 +19,13 @@ struct linux_binprm {
 #ifdef CONFIG_MMU
 	struct vm_area_struct *vma;
 	unsigned long vma_pages;
+	unsigned long argmin; /* rlimit marker for copy_strings() */
 #else
 # define MAX_ARG_PAGES	32
 	struct page *page[MAX_ARG_PAGES];
 #endif
 	struct mm_struct *mm;
 	unsigned long p; /* current top of mem */
-	unsigned long argmin; /* rlimit marker for copy_strings() */
 	unsigned int
 		/* Should an execfd be passed to userspace? */
 		have_execfd:1,
-- 
cgit v1.2.3


From 872bb37f6829d4f7f3ed5afe2786add3d4384b4b Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Tue, 2 Jul 2024 14:16:16 -0700
Subject: randomize_kstack: Improve stack alignment codegen

The codgen for adding architecture-specific stack alignment to the
effective alloca() usage is somewhat inefficient and allows a bit to get
carried beyond the desired entropy range. This isn't really a problem,
but it's unexpected and the codegen is kind of bad.

Quoting Mark[1], the disassembly for arm64's invoke_syscall() looks like:

	// offset = raw_cpu_read(kstack_offset)
	mov     x4, sp
	adrp    x0, kstack_offset
	mrs     x5, tpidr_el1
	add     x0, x0, #:lo12:kstack_offset
	ldr     w0, [x0, x5]

	// offset = KSTACK_OFFSET_MAX(offset)
	and     x0, x0, #0x3ff

	// alloca(offset)
	add     x0, x0, #0xf
	and     x0, x0, #0x7f0
	sub     sp, x4, x0

... which in C would be:

	offset = raw_cpu_read(kstack_offset)
	offset &= 0x3ff;			// [0x0, 0x3ff]
	offset += 0xf;				// [0xf, 0x40e]
	offset &= 0x7f0;			// [0x0,

... so when *all* bits [3:0] are 0, they'll have no impact, and when
*any* of bits [3:0] are 1 they'll trigger a carry into bit 4, which
could ripple all the way up and spill into bit 10.

Switch the masking in KSTACK_OFFSET_MAX() to explicitly clear the bottom
bits to avoid the rounding by using 0b1111110000 instead of 0b1111111111:

	// offset = raw_cpu_read(kstack_offset)
	mov     x4, sp
	adrp    x0, 0 <kstack_offset>
	mrs     x5, tpidr_el1
	add     x0, x0, #:lo12:kstack_offset
	ldr     w0, [x0, x5]

	// offset = KSTACK_OFFSET_MAX(offset)
	and     x0, x0, #0x3f0

	// alloca(offset)
	sub     sp, x4, x0

Suggested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/lkml/ZnVfOnIuFl2kNWkT@J2N7QTR9R3/ [1]
Link: https://lore.kernel.org/r/20240702211612.work.576-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 include/linux/randomize_kstack.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index 6d92b68efbf6..1d982dbdd0d0 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -32,13 +32,19 @@ DECLARE_PER_CPU(u32, kstack_offset);
 #endif
 
 /*
- * Use, at most, 10 bits of entropy. We explicitly cap this to keep the
- * "VLA" from being unbounded (see above). 10 bits leaves enough room for
- * per-arch offset masks to reduce entropy (by removing higher bits, since
- * high entropy may overly constrain usable stack space), and for
- * compiler/arch-specific stack alignment to remove the lower bits.
+ * Use, at most, 6 bits of entropy (on 64-bit; 8 on 32-bit). This cap is
+ * to keep the "VLA" from being unbounded (see above). Additionally clear
+ * the bottom 4 bits (on 64-bit systems, 2 for 32-bit), since stack
+ * alignment will always be at least word size. This makes the compiler
+ * code gen better when it is applying the actual per-arch alignment to
+ * the final offset. The resulting randomness is reasonable without overly
+ * constraining usable stack space.
  */
-#define KSTACK_OFFSET_MAX(x)	((x) & 0x3FF)
+#ifdef CONFIG_64BIT
+#define KSTACK_OFFSET_MAX(x)	((x) & 0b1111110000)
+#else
+#define KSTACK_OFFSET_MAX(x)	((x) & 0b1111111100)
+#endif
 
 /**
  * add_random_kstack_offset - Increase stack utilization by previously
-- 
cgit v1.2.3


From 1df03a4b44146c4f720d793915747272c7773a3e Mon Sep 17 00:00:00 2001
From: Konstantin Taranov <kotaranov@microsoft.com>
Date: Thu, 11 Jul 2024 06:37:57 -0700
Subject: RDMA/mana_ib: Set correct device into ib

Add mana_get_primary_netdev_rcu helper to get a primary
netdevice for a given port. When mana is used with
netvsc, the VF netdev is controlled by an upper netvsc
device. In a baremetal case, the VF netdev is the
primary device.

Use the mana_get_primary_netdev_rcu() helper in the mana_ib
to get the correct device for querying network states.

Fixes: 8b184e4f1c32 ("RDMA/mana_ib: Enable RoCE on port 1")
Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
Link: https://lore.kernel.org/r/1720705077-322-1-git-send-email-kotaranov@linux.microsoft.com
Reviewed-by: Long Li <longli@microsoft.com>
Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/net/mana/mana.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 59823901b74f..f9b4b0dcb69f 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -797,4 +797,6 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
 int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id,
 		   u32 doorbell_pg_id);
 void mana_uncfg_vport(struct mana_port_context *apc);
+
+struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index);
 #endif /* _MANA_H */
-- 
cgit v1.2.3


From 252442f2ae317d109ef0b4b39ce0608c09563042 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed, 10 Jul 2024 10:14:28 +0200
Subject: ipv6: fix source address selection with route leak

By default, an address assigned to the output interface is selected when
the source address is not specified. This is problematic when a route,
configured in a vrf, uses an interface from another vrf (aka route leak).
The original vrf does not own the selected source address.

Let's add a check against the output interface and call the appropriate
function to select the source address.

CC: stable@vger.kernel.org
Fixes: 0d240e7811c4 ("net: vrf: Implement get_saddr for IPv6")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Link: https://patch.msgid.link/20240710081521.3809742-3-nicolas.dichtel@6wind.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ip6_route.h | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index a18ed24fed94..6dbdf60b342f 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -127,18 +127,26 @@ void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args,
 
 static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i,
 				      const struct in6_addr *daddr,
-				      unsigned int prefs,
+				      unsigned int prefs, int l3mdev_index,
 				      struct in6_addr *saddr)
 {
+	struct net_device *l3mdev;
+	struct net_device *dev;
+	bool same_vrf;
 	int err = 0;
 
-	if (f6i && f6i->fib6_prefsrc.plen) {
+	rcu_read_lock();
+
+	l3mdev = dev_get_by_index_rcu(net, l3mdev_index);
+	if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev)
+		dev = f6i ? fib6_info_nh_dev(f6i) : NULL;
+	same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev;
+	if (f6i && f6i->fib6_prefsrc.plen && same_vrf)
 		*saddr = f6i->fib6_prefsrc.addr;
-	} else {
-		struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL;
+	else
+		err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr);
 
-		err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr);
-	}
+	rcu_read_unlock();
 
 	return err;
 }
-- 
cgit v1.2.3


From 1a616c2fe96b357894b74b41787d4ea6987f6199 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Thu, 21 Dec 2023 20:34:17 -0500
Subject: lockdep: lockdep_set_notrack_class()

Add a new helper to disable lockdep tracking entirely for a given class.

This is needed for bcachefs, which takes too many btree node locks for
lockdep to track. Instead, we have a single lockdep_map for "btree_trans
has any btree nodes locked", which makes more since given that we have
centralized lock management and a cycle detector.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/lockdep.h       | 4 ++++
 include/linux/lockdep_types.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 08b0d1d9d78b..b76f1bcd2f7f 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -181,6 +181,9 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name,
 #define lockdep_set_novalidate_class(lock) \
 	lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock)
 
+#define lockdep_set_notrack_class(lock) \
+	lockdep_set_class_and_name(lock, &__lockdep_no_track__, #lock)
+
 /*
  * Compare locking classes
  */
@@ -338,6 +341,7 @@ static inline void lockdep_set_selftest_task(struct task_struct *task)
 #define lockdep_set_subclass(lock, sub)		do { } while (0)
 
 #define lockdep_set_novalidate_class(lock) do { } while (0)
+#define lockdep_set_notrack_class(lock) do { } while (0)
 
 /*
  * We don't define lockdep_match_class() and lockdep_match_key() for !LOCKDEP
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index 70d30d40ea4a..9f361d3ab9d9 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -80,6 +80,7 @@ struct lock_class_key {
 };
 
 extern struct lock_class_key __lockdep_no_validate__;
+extern struct lock_class_key __lockdep_no_track__;
 
 struct lock_trace;
 
-- 
cgit v1.2.3


From 0ece498c27d8cd2fdad6f49a6abc34b8badd8fbc Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Fri, 10 May 2024 10:36:45 -0400
Subject: Bluetooth: MGMT: Make MGMT_OP_LOAD_CONN_PARAM update existing
 connection

This makes MGMT_OP_LOAD_CONN_PARAM update existing connection by
dectecting the request is just for one connection, parameters already
exists and there is a connection.

Since this is a new behavior the revision is also updated to enable
userspace to detect it.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_sync.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 534c3386e714..20168732f20e 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -138,6 +138,7 @@ int hci_suspend_sync(struct hci_dev *hdev);
 int hci_resume_sync(struct hci_dev *hdev);
 
 struct hci_conn;
+struct hci_conn_params;
 
 int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason);
 
@@ -156,3 +157,5 @@ int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn);
 int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn);
 
 int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn);
+int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn,
+			    struct hci_conn_params *params);
-- 
cgit v1.2.3


From 8f7dfe171c576aaec4911cc59feaed26d79c7c7f Mon Sep 17 00:00:00 2001
From: Erick Archer <erick.archer@outlook.com>
Date: Sat, 18 May 2024 10:30:38 +0200
Subject: Bluetooth: hci_core: Prefer struct_size over open coded arithmetic

This is an effort to get rid of all multiplications from allocation
functions in order to prevent integer overflows [1][2].

As the "dl" variable is a pointer to "struct hci_dev_list_req" and this
structure ends in a flexible array:

struct hci_dev_list_req {
	[...]
	struct hci_dev_req dev_req[];	/* hci_dev_req structures */
};

the preferred way in the kernel is to use the struct_size() helper to
do the arithmetic instead of the calculation "size + count * size" in
the kzalloc() and copy_to_user() functions.

At the same time, prepare for the coming implementation by GCC and Clang
of the __counted_by attribute. Flexible array members annotated with
__counted_by can have their accesses bounds-checked at run-time via
CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for
strcpy/memcpy-family functions).

In this case, it is important to note that the logic needs a little
refactoring to ensure that the "dev_num" member is initialized before
the first access to the flex array. Specifically, add the assignment
before the list_for_each_entry() loop.

Also remove the "size" variable as it is no longer needed.

This way, the code is more readable and safer.

This code was detected with the help of Coccinelle, and audited and
modified manually.

Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1]
Link: https://github.com/KSPP/linux/issues/160 [2]
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Erick Archer <erick.archer@outlook.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h
index 9949870f7d78..13e8cd4414a1 100644
--- a/include/net/bluetooth/hci_sock.h
+++ b/include/net/bluetooth/hci_sock.h
@@ -144,7 +144,7 @@ struct hci_dev_req {
 
 struct hci_dev_list_req {
 	__u16  dev_num;
-	struct hci_dev_req dev_req[];	/* hci_dev_req structures */
+	struct hci_dev_req dev_req[] __counted_by(dev_num);
 };
 
 struct hci_conn_list_req {
-- 
cgit v1.2.3


From 7d2c7ddba6238e6a14cd89ef869878dd22f2a661 Mon Sep 17 00:00:00 2001
From: Erick Archer <erick.archer@outlook.com>
Date: Fri, 17 May 2024 19:21:49 +0200
Subject: tty: rfcomm: prefer struct_size over open coded arithmetic

This is an effort to get rid of all multiplications from allocation
functions in order to prevent integer overflows [1][2].

As the "dl" variable is a pointer to "struct rfcomm_dev_list_req" and
this structure ends in a flexible array:

struct rfcomm_dev_list_req {
	[...]
	struct   rfcomm_dev_info dev_info[];
};

the preferred way in the kernel is to use the struct_size() helper to
do the arithmetic instead of the calculation "size + count * size" in
the kzalloc() and copy_to_user() functions.

At the same time, prepare for the coming implementation by GCC and Clang
of the __counted_by attribute. Flexible array members annotated with
__counted_by can have their accesses bounds-checked at run-time via
CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for
strcpy/memcpy-family functions).

In this case, it is important to note that the logic needs a little
refactoring to ensure that the "dev_num" member is initialized before
the first access to the flex array. Specifically, add the assignment
before the list_for_each_entry() loop.

Also remove the "size" variable as it is no longer needed.

This way, the code is more readable and safer.

This code was detected with the help of Coccinelle, and audited and
modified manually.

Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1]
Link: https://github.com/KSPP/linux/issues/160 [2]
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Erick Archer <erick.archer@outlook.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/rfcomm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 99d26879b02a..c05882476900 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -355,7 +355,7 @@ struct rfcomm_dev_info {
 
 struct rfcomm_dev_list_req {
 	u16      dev_num;
-	struct   rfcomm_dev_info dev_info[];
+	struct   rfcomm_dev_info dev_info[] __counted_by(dev_num);
 };
 
 int  rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
-- 
cgit v1.2.3


From f25b7fd36cc3a850e006aed686f5bbecd200de1b Mon Sep 17 00:00:00 2001
From: Ying Hsu <yinghsu@chromium.org>
Date: Wed, 29 May 2024 08:00:00 +0000
Subject: Bluetooth: Add vendor-specific packet classification for ISO data

When HCI raw sockets are opened, the Bluetooth kernel module doesn't
track CIS/BIS connections. User-space applications have to identify
ISO data by maintaining connection information and look up the mapping
for each ACL data packet received. Besides, btsnoop log captured in
kernel couldn't tell ISO data from ACL data in this case.

To avoid additional lookups, this patch introduces vendor-specific
packet classification for Intel BT controllers to distinguish
ISO data packets from ACL data packets.

Signed-off-by: Ying Hsu <yinghsu@chromium.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index c43716edf205..f7de2681d457 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -649,6 +649,7 @@ struct hci_dev {
 	int (*get_codec_config_data)(struct hci_dev *hdev, __u8 type,
 				     struct bt_codec *codec, __u8 *vnd_len,
 				     __u8 **vnd_data);
+	u8 (*classify_pkt_type)(struct hci_dev *hdev, struct sk_buff *skb);
 };
 
 #define HCI_PHY_HANDLE(handle)	(handle & 0xff)
-- 
cgit v1.2.3


From da63f331353c9e1e6dc29e49e28f8f4fe5d642fd Mon Sep 17 00:00:00 2001
From: Dmitry Antipov <dmantipov@yandex.ru>
Date: Tue, 18 Jun 2024 21:59:32 +0300
Subject: Bluetooth: hci_core, hci_sync: cleanup struct discovery_state

After commit 78db544b5d27 ("Bluetooth: hci_core: Remove le_restart_scan
work"), 'scan_start' and 'scan_duration' of 'struct discovery_state'
are still initialized but actually unused. So remove the aforementioned
fields and adjust 'hci_discovery_filter_clear()' and 'le_scan_disable()'
accordingly. Compile tested only.

Fixes: 78db544b5d27 ("Bluetooth: hci_core: Remove le_restart_scan work")
Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_core.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index f7de2681d457..eaeaf3dc07aa 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -91,8 +91,6 @@ struct discovery_state {
 	s8			rssi;
 	u16			uuid_count;
 	u8			(*uuids)[16];
-	unsigned long		scan_start;
-	unsigned long		scan_duration;
 	unsigned long		name_resolve_timeout;
 };
 
@@ -891,8 +889,6 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev)
 	hdev->discovery.uuid_count = 0;
 	kfree(hdev->discovery.uuids);
 	hdev->discovery.uuids = NULL;
-	hdev->discovery.scan_start = 0;
-	hdev->discovery.scan_duration = 0;
 }
 
 bool hci_discovery_active(struct hci_dev *hdev);
-- 
cgit v1.2.3


From 2c1583290b08c5aa9005178a573be8f329de2976 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kamil=20Hor=C3=A1k=20=282N=29?= <kamilh@axis.com>
Date: Fri, 12 Jul 2024 17:07:06 +0200
Subject: net: phy: bcm54811: New link mode for BroadR-Reach
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a new link mode necessary for 10 MBit single-pair
connection in BroadR-Reach mode on bcm5481x PHY by Broadcom.
This new link mode, 10baseT1BRR, is known as 1BR10 in the Broadcom
terminology. Another link mode to be used is 1BR100 and it is already
present as 100baseT1, because Broadcom's 1BR100 became 100baseT1
(IEEE 802.3bw).

Signed-off-by: Kamil Horák (2N) <kamilh@axis.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://patch.msgid.link/20240712150709.3134474-2-kamilh@axis.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/ethtool.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 230110b97029..4a0a6e703483 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -2054,6 +2054,7 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_10baseT1S_Full_BIT		 = 99,
 	ETHTOOL_LINK_MODE_10baseT1S_Half_BIT		 = 100,
 	ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT	 = 101,
+	ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT		 = 102,
 
 	/* must be last entry */
 	__ETHTOOL_LINK_MODE_MASK_NBITS
-- 
cgit v1.2.3


From ff253875ff3bab75214c4e94352956dc02a0d965 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kamil=20Hor=C3=A1k=20=282N=29?= <kamilh@axis.com>
Date: Fri, 12 Jul 2024 17:07:07 +0200
Subject: net: phy: bcm54811: Add LRE registers definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the definitions of LRE registers for Broadcom BCM5481x PHY

Signed-off-by: Kamil Horák (2N) <kamilh@axis.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/20240712150709.3134474-3-kamilh@axis.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/brcmphy.h | 88 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

(limited to 'include')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 1394ba302367..028b3e00378e 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -271,12 +271,100 @@
 #define BCM5482_SSD_SGMII_SLAVE_EN	0x0002	/* Slave mode enable */
 #define BCM5482_SSD_SGMII_SLAVE_AD	0x0001	/* Slave auto-detection */
 
+/* BroadR-Reach LRE Registers. */
+#define MII_BCM54XX_LRECR		0x00	/* LRE Control Register                    */
+#define MII_BCM54XX_LRESR		0x01	/* LRE Status Register                     */
+#define MII_BCM54XX_LREPHYSID1		0x02	/* LRE PHYS ID 1                           */
+#define MII_BCM54XX_LREPHYSID2		0x03	/* LRE PHYS ID 2                           */
+#define MII_BCM54XX_LREANAA		0x04	/* LDS Auto-Negotiation Advertised Ability */
+#define MII_BCM54XX_LREANAC		0x05	/* LDS Auto-Negotiation Advertised Control */
+#define MII_BCM54XX_LREANPT		0x06	/* LDS Ability Next Page Transmit          */
+#define MII_BCM54XX_LRELPA		0x07	/* LDS Link Partner Ability                */
+#define MII_BCM54XX_LRELPNPM		0x08	/* LDS Link Partner Next Page Message      */
+#define MII_BCM54XX_LRELPNPC		0x09	/* LDS Link Partner Next Page Control      */
+#define MII_BCM54XX_LRELDSE		0x0a	/* LDS Expansion Register                  */
+#define MII_BCM54XX_LREES		0x0f	/* LRE Extended Status                     */
+
+/* LRE control register. */
+#define LRECR_RESET			0x8000	/* Reset to default state      */
+#define LRECR_LOOPBACK			0x4000	/* Internal Loopback           */
+#define LRECR_LDSRES			0x2000	/* Restart LDS Process         */
+#define LRECR_LDSEN			0x1000	/* LDS Enable                  */
+#define LRECR_PDOWN			0x0800	/* Enable low power state      */
+#define LRECR_ISOLATE			0x0400	/* Isolate data paths from MII */
+#define LRECR_SPEED100			0x0200	/* Select 100 Mbps             */
+#define LRECR_SPEED10			0x0000	/* Select 10 Mbps              */
+#define LRECR_4PAIRS			0x0020	/* Select 4 Pairs              */
+#define LRECR_2PAIRS			0x0010	/* Select 2 Pairs              */
+#define LRECR_1PAIR			0x0000	/* Select 1 Pair               */
+#define LRECR_MASTER			0x0008	/* Force Master when LDS disabled */
+#define LRECR_SLAVE			0x0000	/* Force Slave when LDS disabled  */
+
+/* LRE status register. */
+#define LRESR_100_1PAIR			0x2000	/* Can do 100Mbps 1 Pair       */
+#define LRESR_100_4PAIR			0x1000	/* Can do 100Mbps 4 Pairs      */
+#define LRESR_100_2PAIR			0x0800	/* Can do 100Mbps 2 Pairs      */
+#define LRESR_10_2PAIR			0x0400	/* Can do 10Mbps 2 Pairs       */
+#define LRESR_10_1PAIR			0x0200	/* Can do 10Mbps 1 Pair        */
+#define LRESR_ESTATEN			0x0100	/* Extended Status in R15      */
+#define LRESR_RESV			0x0080	/* Unused...                   */
+#define LRESR_MFPS			0x0040	/* Can suppress Management Frames Preamble */
+#define LRESR_LDSCOMPLETE		0x0020	/* LDS Auto-negotiation complete */
+#define LRESR_8023			0x0010	/* Has IEEE 802.3 Support      */
+#define LRESR_LDSABILITY		0x0008	/* LDS auto-negotiation capable */
+#define LRESR_LSTATUS			0x0004	/* Link status                 */
+#define LRESR_JCD			0x0002	/* Jabber detected             */
+#define LRESR_ERCAP			0x0001	/* Ext-reg capability          */
+
+/* LDS Auto-Negotiation Advertised Ability. */
+#define LREANAA_PAUSE_ASYM		0x8000	/* Can pause asymmetrically    */
+#define LREANAA_PAUSE			0x4000	/* Can pause                   */
+#define LREANAA_100_1PAIR		0x0020	/* Can do 100Mbps 1 Pair       */
+#define LREANAA_100_4PAIR		0x0010	/* Can do 100Mbps 4 Pair       */
+#define LREANAA_100_2PAIR		0x0008	/* Can do 100Mbps 2 Pair       */
+#define LREANAA_10_2PAIR		0x0004	/* Can do 10Mbps 2 Pair        */
+#define LREANAA_10_1PAIR		0x0002	/* Can do 10Mbps 1 Pair        */
+
+#define LRE_ADVERTISE_FULL		(LREANAA_100_1PAIR | LREANAA_100_4PAIR | \
+					 LREANAA_100_2PAIR | LREANAA_10_2PAIR | \
+					 LREANAA_10_1PAIR)
+
+#define LRE_ADVERTISE_ALL		LRE_ADVERTISE_FULL
+
+/* LDS Link Partner Ability. */
+#define LRELPA_PAUSE_ASYM		0x8000	/* Supports asymmetric pause   */
+#define LRELPA_PAUSE			0x4000	/* Supports pause capability   */
+#define LRELPA_100_1PAIR		0x0020	/* 100Mbps 1 Pair capable      */
+#define LRELPA_100_4PAIR		0x0010	/* 100Mbps 4 Pair capable      */
+#define LRELPA_100_2PAIR		0x0008	/* 100Mbps 2 Pair capable      */
+#define LRELPA_10_2PAIR			0x0004	/* 10Mbps 2 Pair capable       */
+#define LRELPA_10_1PAIR			0x0002	/* 10Mbps 1 Pair capable       */
+
+/* LDS Expansion register. */
+#define LDSE_DOWNGRADE			0x8000	/* Can do LDS Speed Downgrade  */
+#define LDSE_MASTER			0x4000	/* Master / Slave              */
+#define LDSE_PAIRS_MASK			0x3000	/* Pair Count Mask             */
+#define LDSE_PAIRS_SHIFT		12
+#define LDSE_4PAIRS			(2 << LDSE_PAIRS_SHIFT)	/* 4 Pairs Connection */
+#define LDSE_2PAIRS			(1 << LDSE_PAIRS_SHIFT)	/* 2 Pairs Connection */
+#define LDSE_1PAIR			(0 << LDSE_PAIRS_SHIFT)	/* 1 Pair  Connection */
+#define LDSE_CABLEN_MASK		0x0FFF	/* Cable Length Mask           */
+
 /* BCM54810 Registers */
 #define BCM54810_EXP_BROADREACH_LRE_MISC_CTL	(MII_BCM54XX_EXP_SEL_ER + 0x90)
 #define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN	(1 << 0)
 #define BCM54810_SHD_CLK_CTL			0x3
 #define BCM54810_SHD_CLK_CTL_GTXCLK_EN		(1 << 9)
 
+/* BCM54811 Registers */
+#define BCM54811_EXP_BROADREACH_LRE_OVERLAY_CTL	(MII_BCM54XX_EXP_SEL_ER + 0x9A)
+/* Access Control Override Enable */
+#define BCM54811_EXP_BROADREACH_LRE_OVERLAY_CTL_EN		BIT(15)
+/* Access Control Override Value */
+#define BCM54811_EXP_BROADREACH_LRE_OVERLAY_CTL_OVERRIDE_VAL	BIT(14)
+/* Access Control Value */
+#define BCM54811_EXP_BROADREACH_LRE_OVERLAY_CTL_VAL		BIT(13)
+
 /* BCM54612E Registers */
 #define BCM54612E_EXP_SPARE0		(MII_BCM54XX_EXP_SEL_ETC + 0x34)
 #define BCM54612E_LED4_CLK125OUT_EN	(1 << 1)
-- 
cgit v1.2.3


From a52c6330ff2fe1163333fa6609bdc6e8763ec286 Mon Sep 17 00:00:00 2001
From: "Alex Shi (Tencent)" <alexs@kernel.org>
Date: Fri, 12 Jul 2024 12:14:35 +0800
Subject: mm/memcg: alignment memcg_data define condition

commit 21c690a349ba ("mm: introduce slabobj_ext to support slab object
extensions") changed the folio/page->memcg_data define condition from
MEMCG to SLAB_OBJ_EXT. This action make memcg_data exposed while !MEMCG.

As Vlastimil Babka suggested, let's add _unused_slab_obj_exts for
SLAB_MATCH for slab.obj_exts while !MEMCG. That could resolve the match
issue, clean up the feature logical.

Signed-off-by: Alex Shi (Tencent) <alexs@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yoann Congal <yoann.congal@smile.fr>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/mm_types.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index af3a0256fa93..a199c48bc462 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -169,8 +169,10 @@ struct page {
 	/* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
 	atomic_t _refcount;
 
-#ifdef CONFIG_SLAB_OBJ_EXT
+#ifdef CONFIG_MEMCG
 	unsigned long memcg_data;
+#elif defined(CONFIG_SLAB_OBJ_EXT)
+	unsigned long _unused_slab_obj_exts;
 #endif
 
 	/*
@@ -298,6 +300,7 @@ typedef struct {
  * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h.
  * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head().
  * @_deferred_list: Folios to be split under memory pressure.
+ * @_unused_slab_obj_exts: Placeholder to match obj_exts in struct slab.
  *
  * A folio is a physically, virtually and logically contiguous set
  * of bytes.  It is a power-of-two in size, and it is aligned to that
@@ -332,8 +335,10 @@ struct folio {
 			};
 			atomic_t _mapcount;
 			atomic_t _refcount;
-#ifdef CONFIG_SLAB_OBJ_EXT
+#ifdef CONFIG_MEMCG
 			unsigned long memcg_data;
+#elif defined(CONFIG_SLAB_OBJ_EXT)
+			unsigned long _unused_slab_obj_exts;
 #endif
 #if defined(WANT_PAGE_VIRTUAL)
 			void *virtual;
-- 
cgit v1.2.3


From 6b04928e83f298d99359f3ba67187a328c223357 Mon Sep 17 00:00:00 2001
From: Julien Panis <jpanis@baylibre.com>
Date: Mon, 3 Jun 2024 12:50:48 +0200
Subject: dt-bindings: thermal: mediatek: Fix thermal zone definition for
 MT8186

Fix a thermal zone name for consistency with the other SoCs:
MFG contains GPU, the latter is more specific and must be used here.

The naming must be fixed "atomically" so compilation does not break.
As a result, the change is made in the dt-bindings and in the LVTS
driver within a single commit, despite the checkpatch warning.

The definition can be safely modified here because it is used only
in the LVTS driver, which is modified accordingly, and has not yet
been included in a released kernel.

Fixes: a2ca202350f9 ("dt-bindings: thermal: mediatek: Add LVTS thermal controller definition for MT8186")
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Julien Panis <jpanis@baylibre.com>
Link: https://lore.kernel.org/r/20240603-mtk-thermal-mt818x-dtsi-v7-1-8c8e3c7a3643@baylibre.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/dt-bindings/thermal/mediatek,lvts-thermal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/thermal/mediatek,lvts-thermal.h b/include/dt-bindings/thermal/mediatek,lvts-thermal.h
index bf95309d2525..85d25b4d726d 100644
--- a/include/dt-bindings/thermal/mediatek,lvts-thermal.h
+++ b/include/dt-bindings/thermal/mediatek,lvts-thermal.h
@@ -24,7 +24,7 @@
 #define MT8186_BIG_CPU1	5
 #define MT8186_NNA		6
 #define MT8186_ADSP		7
-#define MT8186_MFG		8
+#define MT8186_GPU		8
 
 #define MT8188_MCU_LITTLE_CPU0	0
 #define MT8188_MCU_LITTLE_CPU1	1
-- 
cgit v1.2.3


From be3e224ec502d49cf9017304286c14418b230fe6 Mon Sep 17 00:00:00 2001
From: Julien Panis <jpanis@baylibre.com>
Date: Mon, 3 Jun 2024 12:50:49 +0200
Subject: dt-bindings: thermal: mediatek: Fix thermal zone definitions for
 MT8188

Fix thermal zone names for consistency with the other SoCs:
- GPU0 must be used as the first GPU item.
- SOCx deal with audio DSP, video, and infra subsystems.

The naming must be fixed "atomically" so compilation does not break.
As a result, the change is made in the dt-bindings and in the LVTS
driver within a single commit, despite the checkpatch warning.

The definitions can be safely modified here because they are used only
in the LVTS driver, which is modified accordingly, and have not yet
been included in a released kernel.

Fixes: 78c88534e5e1 ("dt-bindings: thermal: mediatek: Add LVTS thermal controller definition for MT8188")
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Julien Panis <jpanis@baylibre.com>
Link: https://lore.kernel.org/r/20240603-mtk-thermal-mt818x-dtsi-v7-2-8c8e3c7a3643@baylibre.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/dt-bindings/thermal/mediatek,lvts-thermal.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/thermal/mediatek,lvts-thermal.h b/include/dt-bindings/thermal/mediatek,lvts-thermal.h
index 85d25b4d726d..ddc7302a510a 100644
--- a/include/dt-bindings/thermal/mediatek,lvts-thermal.h
+++ b/include/dt-bindings/thermal/mediatek,lvts-thermal.h
@@ -34,11 +34,11 @@
 #define MT8188_MCU_BIG_CPU1	5
 
 #define MT8188_AP_APU		0
-#define MT8188_AP_GPU1		1
-#define MT8188_AP_GPU2		2
-#define MT8188_AP_SOC1		3
-#define MT8188_AP_SOC2		4
-#define MT8188_AP_SOC3		5
+#define MT8188_AP_GPU0		1
+#define MT8188_AP_GPU1		2
+#define MT8188_AP_ADSP		3
+#define MT8188_AP_VDO		4
+#define MT8188_AP_INFRA		5
 #define MT8188_AP_CAM1		6
 #define MT8188_AP_CAM2		7
 
-- 
cgit v1.2.3


From 136a8066676e593cd29627219467fc222c8f3b04 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Thu, 11 Jul 2024 21:11:03 -0300
Subject: iommufd: Put constants for all the uAPI enums

Relying on position in the enum makes it subtly harder when doing merge
resolutions or backporting as it is easy to grab a patch and not notice it
is a uAPI change with a differently ordered enum. This may become a bigger
problem in next cycles when iommu_hwpt_invalidate_data_type and other
per-driver enums have patches flowing through different trees.

So lets start including constants for all the uAPI enums to make this
safer.

No functional change.

Link: https://lore.kernel.org/r/0-v1-2c06ec044924+133-iommufd_uapi_const_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/linux/iommufd.h | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index e31385b75d0b..4dde745cfb7e 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -37,20 +37,20 @@
 enum {
 	IOMMUFD_CMD_BASE = 0x80,
 	IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
-	IOMMUFD_CMD_IOAS_ALLOC,
-	IOMMUFD_CMD_IOAS_ALLOW_IOVAS,
-	IOMMUFD_CMD_IOAS_COPY,
-	IOMMUFD_CMD_IOAS_IOVA_RANGES,
-	IOMMUFD_CMD_IOAS_MAP,
-	IOMMUFD_CMD_IOAS_UNMAP,
-	IOMMUFD_CMD_OPTION,
-	IOMMUFD_CMD_VFIO_IOAS,
-	IOMMUFD_CMD_HWPT_ALLOC,
-	IOMMUFD_CMD_GET_HW_INFO,
-	IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
-	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
-	IOMMUFD_CMD_HWPT_INVALIDATE,
-	IOMMUFD_CMD_FAULT_QUEUE_ALLOC,
+	IOMMUFD_CMD_IOAS_ALLOC = 0x81,
+	IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82,
+	IOMMUFD_CMD_IOAS_COPY = 0x83,
+	IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84,
+	IOMMUFD_CMD_IOAS_MAP = 0x85,
+	IOMMUFD_CMD_IOAS_UNMAP = 0x86,
+	IOMMUFD_CMD_OPTION = 0x87,
+	IOMMUFD_CMD_VFIO_IOAS = 0x88,
+	IOMMUFD_CMD_HWPT_ALLOC = 0x89,
+	IOMMUFD_CMD_GET_HW_INFO = 0x8a,
+	IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b,
+	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
+	IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
+	IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
 };
 
 /**
@@ -400,8 +400,8 @@ struct iommu_hwpt_vtd_s1 {
  * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
  */
 enum iommu_hwpt_data_type {
-	IOMMU_HWPT_DATA_NONE,
-	IOMMU_HWPT_DATA_VTD_S1,
+	IOMMU_HWPT_DATA_NONE = 0,
+	IOMMU_HWPT_DATA_VTD_S1 = 1,
 };
 
 /**
@@ -491,8 +491,8 @@ struct iommu_hw_info_vtd {
  * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
  */
 enum iommu_hw_info_type {
-	IOMMU_HW_INFO_TYPE_NONE,
-	IOMMU_HW_INFO_TYPE_INTEL_VTD,
+	IOMMU_HW_INFO_TYPE_NONE = 0,
+	IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
 };
 
 /**
@@ -629,7 +629,7 @@ struct iommu_hwpt_get_dirty_bitmap {
  * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
  */
 enum iommu_hwpt_invalidate_data_type {
-	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
+	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
 };
 
 /**
@@ -768,7 +768,7 @@ struct iommu_hwpt_pgfault {
  */
 enum iommufd_page_response_code {
 	IOMMUFD_PAGE_RESP_SUCCESS = 0,
-	IOMMUFD_PAGE_RESP_INVALID,
+	IOMMUFD_PAGE_RESP_INVALID = 1,
 };
 
 /**
-- 
cgit v1.2.3


From f0eb154c39471bf881422e8ac23e4c037289ece9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 5 Jul 2024 10:31:54 +0100
Subject: irqchip/gic-v4: Substitute vmovp_lock for a per-VM lock

vmovp_lock is abused in a number of cases to serialise updates
to vlpi_count[] and deal with map/unmap of a VM to ITSs.

Instead, provide a per-VM lock and revisit the use of vlpi_count[]
so that it is always wrapped in this per-VM vmapp_lock.

This reduces the potential contention on a concurrent VMOVP command,
and paves the way for subsequent VPE locking that holding vmovp_lock
actively prevents due to the lock ordering.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nianyao Tang <tangnianyao@huawei.com>
Link: https://lore.kernel.org/r/20240705093155.871070-3-maz@kernel.org
---
 include/linux/irqchip/arm-gic-v4.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index 2c63375bbd43..ecabed6d3307 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -25,6 +25,14 @@ struct its_vm {
 	irq_hw_number_t		db_lpi_base;
 	unsigned long		*db_bitmap;
 	int			nr_db_lpis;
+	/*
+	 * Ensures mutual exclusion between updates to vlpi_count[]
+	 * and map/unmap when using the ITSList mechanism.
+	 *
+	 * The lock order for any sequence involving the ITSList is
+	 * vmapp_lock -> vpe_lock ->vmovp_lock.
+	 */
+	raw_spinlock_t		vmapp_lock;
 	u32			vlpi_count[GICv4_ITS_LIST_MAX];
 };
 
-- 
cgit v1.2.3


From c37927a203fa283950f6045602b9f71328ad786c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 11 Jul 2024 12:20:04 +0200
Subject: genirq: Set IRQF_COND_ONESHOT in request_irq()

The callers of request_irq() don't care about IRQF_ONESHOT because they
don't provide threaded handlers, but if they happen to share the IRQ with
the ACPI SCI, which has a threaded handler and sets IRQF_ONESHOT,
request_irq() will fail for them due to a flags mismatch.

Address this by making request_irq() add IRQF_COND_ONESHOT to the flags
passed to request_threaded_irq() for all of its callers.

Fixes: 7a36b901a6eb ("ACPI: OSL: Use a threaded interrupt handler for SCI")
Reported-by: Stefan Seyfried <stefan.seyfried@googlemail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Stefan Seyfried <stefan.seyfried@googlemail.com>
Cc: stable@vger.kerel.org
Link: https://lore.kernel.org/r/5800834.DvuYhMxLoT@rjwysocki.net
Closes: https://lore.kernel.org/lkml/205bd84a-fe8e-4963-968e-0763285f35ba@message-id.googlemail.com
---
 include/linux/interrupt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5c9bdd3ffccc..dac7466de5f3 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -168,7 +168,7 @@ static inline int __must_check
 request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
 	    const char *name, void *dev)
 {
-	return request_threaded_irq(irq, handler, NULL, flags, name, dev);
+	return request_threaded_irq(irq, handler, NULL, flags | IRQF_COND_ONESHOT, name, dev);
 }
 
 extern int __must_check
-- 
cgit v1.2.3


From b7b377332b96a38bc98928d7ec2674c77a95fcb3 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Fri, 12 Jul 2024 08:41:48 +0200
Subject: irqdomain: Fix the kernel-doc and plug it into Documentation

There were several undocumented fields in structs irq_domain_ops and
irq_domain_info. Document them.

irq_domain_ops::revmap_size contained "[]" in the description, which is not
allowed in sphinx. Remove that.

Finally, plug the whole header (irqdomain.h) into genericirq.rst, so that
the docs is autogenerated and hyperlinks to these structure are created.

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20240712064148.157040-1-jirislaby@kernel.org
---
 include/linux/irqdomain.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 02cd486ac354..de6105f68fec 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -74,11 +74,24 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
  * struct irq_domain_ops - Methods for irq_domain objects
  * @match: Match an interrupt controller device node to a host, returns
  *         1 on a match
+ * @select: Match an interrupt controller fw specification. It is more generic
+ *	    than @match as it receives a complete struct irq_fwspec. Therefore,
+ *	    @select is preferred if provided. Returns 1 on a match.
  * @map: Create or update a mapping between a virtual irq number and a hw
  *       irq number. This is called only once for a given mapping.
  * @unmap: Dispose of such a mapping
  * @xlate: Given a device tree node and interrupt specifier, decode
  *         the hardware irq number and linux irq type value.
+ * @alloc: Allocate @nr_irqs interrupts starting from @virq.
+ * @free: Free @nr_irqs interrupts starting from @virq.
+ * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only
+ *	      reserve the vector. If unset, assign the vector (called from
+ *	      request_irq()).
+ * @deactivate: Disarm one interrupt (@irqd).
+ * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and
+ *	       linux irq type value (@out_type). This is a generalised @xlate
+ *	       (over struct irq_fwspec) and is preferred if provided.
+ * @debug_show: For domains to show specific data for an interrupt in debugfs.
  *
  * Functions below are provided by the driver and called whenever a new mapping
  * is created or an old mapping is disposed. The driver can then proceed to
@@ -131,6 +144,9 @@ struct irq_domain_chip_generic;
  * Optional elements:
  * @fwnode:	Pointer to firmware node associated with the irq_domain. Pretty easy
  *		to swap it for the of_node via the irq_domain_get_of_node accessor
+ * @bus_token:	@fwnode's device_node might be used for several irq domains. But
+ *		in connection with @bus_token, the pair shall be unique in a
+ *		system.
  * @gc:		Pointer to a list of generic chips. There is a helper function for
  *		setting up one or more generic chips for interrupt controllers
  *		drivers using the generic chip library which uses this pointer.
@@ -144,7 +160,9 @@ struct irq_domain_chip_generic;
  * @exit:	Function called when the domain is destroyed
  *
  * Revmap data, used internally by the irq domain code:
- * @revmap_size:	Size of the linear map table @revmap[]
+ * @hwirq_max:		Top limit for the HW irq number. Especially to avoid
+ *			conflicts/failures with reserved HW irqs. Can be ~0.
+ * @revmap_size:	Size of the linear map table @revmap
  * @revmap_tree:	Radix map tree for hwirqs that don't fit in the linear map
  * @revmap:		Linear table of irq_data pointers
  */
-- 
cgit v1.2.3


From 6c87e1a4792804efce8ab3dfdb6e9ada314ec6dd Mon Sep 17 00:00:00 2001
From: Pawel Dembicki <paweldembicki@gmail.com>
Date: Sat, 13 Jul 2024 23:16:13 +0200
Subject: net: dsa: vsc73xx: introduce tag 8021q for vsc73xx

This commit introduces a new tagger based on 802.1q tagging.
It's designed for the vsc73xx driver. The VSC73xx family doesn't have
any tag support for the RGMII port, but it could be based on VLANs.

Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://patch.msgid.link/20240713211620.1125910-8-paweldembicki@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dsa.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index f9ae3ca66b6f..5a5a03a7b4c3 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -53,6 +53,7 @@ struct tc_action;
 #define DSA_TAG_PROTO_RTL8_4T_VALUE		25
 #define DSA_TAG_PROTO_RZN1_A5PSW_VALUE		26
 #define DSA_TAG_PROTO_LAN937X_VALUE		27
+#define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE	28
 
 enum dsa_tag_protocol {
 	DSA_TAG_PROTO_NONE		= DSA_TAG_PROTO_NONE_VALUE,
@@ -83,6 +84,7 @@ enum dsa_tag_protocol {
 	DSA_TAG_PROTO_RTL8_4T		= DSA_TAG_PROTO_RTL8_4T_VALUE,
 	DSA_TAG_PROTO_RZN1_A5PSW	= DSA_TAG_PROTO_RZN1_A5PSW_VALUE,
 	DSA_TAG_PROTO_LAN937X		= DSA_TAG_PROTO_LAN937X_VALUE,
+	DSA_TAG_PROTO_VSC73XX_8021Q	= DSA_TAG_PROTO_VSC73XX_8021Q_VALUE,
 };
 
 struct dsa_switch;
-- 
cgit v1.2.3


From ce20fdd670ac375a4e3dff91c2888ad9ff9eef56 Mon Sep 17 00:00:00 2001
From: Pawel Dembicki <paweldembicki@gmail.com>
Date: Sat, 13 Jul 2024 23:16:15 +0200
Subject: net: dsa: Define max num of bridges in tag8021q implementation

Max number of bridges in tag8021q implementation is strictly limited
by VBID size: 3 bits. But zero is reserved and only 7 values can be used.

This patch adds define which describe maximum possible value.

Suggested-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://patch.msgid.link/20240713211620.1125910-10-paweldembicki@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dsa/8021q.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index f3664ee12170..1dda2a13b832 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -8,6 +8,11 @@
 #include <net/dsa.h>
 #include <linux/types.h>
 
+/* VBID is limited to three bits only and zero is reserved.
+ * Only 7 bridges can be enumerated.
+ */
+#define DSA_TAG_8021Q_MAX_NUM_BRIDGES	7
+
 int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto);
 
 void dsa_tag_8021q_unregister(struct dsa_switch *ds);
-- 
cgit v1.2.3


From 85aabd1fe9d6af4dc5d11a2d8be567ec45d1dc5e Mon Sep 17 00:00:00 2001
From: Pawel Dembicki <paweldembicki@gmail.com>
Date: Sat, 13 Jul 2024 23:16:16 +0200
Subject: net: dsa: prepare 'dsa_tag_8021q_bridge_join' for standalone use

The 'dsa_tag_8021q_bridge_join' could be used as a generic implementation
of the 'ds->ops->port_bridge_join()' function. However, it is necessary
to synchronize their arguments.

This patch also moves the 'tx_fwd_offload' flag configuration line into
'dsa_tag_8021q_bridge_join' body. Currently, every (sja1105) driver sets
it, and the future vsc73xx implementation will also need it for
simplification.

Suggested-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://patch.msgid.link/20240713211620.1125910-11-paweldembicki@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dsa/8021q.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 1dda2a13b832..d13aabdeb4b2 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -18,7 +18,8 @@ int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto);
 void dsa_tag_8021q_unregister(struct dsa_switch *ds);
 
 int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port,
-			      struct dsa_bridge bridge);
+			      struct dsa_bridge bridge, bool *tx_fwd_offload,
+			      struct netlink_ext_ack *extack);
 
 void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port,
 				struct dsa_bridge bridge);
-- 
cgit v1.2.3


From 3ba74b2f288bbc17c0c2a58ab219e1df19f80153 Mon Sep 17 00:00:00 2001
From: Dmitry Antipov <dmantipov@yandex.ru>
Date: Fri, 21 Jun 2024 16:01:55 +0300
Subject: Bluetooth: hci_core: cleanup struct hci_dev

Remove unused and set but otherwise unused 'discovery_old_state'
and 'sco_last_tx' members of 'struct hci_dev'. The first one is
a leftover after commit 182ee45da083 ("Bluetooth: hci_sync: Rework
hci_suspend_notifier"); the second one is originated from ancient
2.4.19 and I was unable to find any actual use since that.

Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_core.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index eaeaf3dc07aa..31020891fc68 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -476,7 +476,6 @@ struct hci_dev {
 	unsigned int	iso_pkts;
 
 	unsigned long	acl_last_tx;
-	unsigned long	sco_last_tx;
 	unsigned long	le_last_tx;
 
 	__u8		le_tx_def_phys;
@@ -528,7 +527,6 @@ struct hci_dev {
 
 	struct discovery_state	discovery;
 
-	int			discovery_old_state;
 	bool			discovery_paused;
 	int			advertising_old_state;
 	bool			advertising_paused;
-- 
cgit v1.2.3


From 92048ab2e2e6cc90ad1cc9f55deb5cec4d731793 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 1 Jul 2024 16:00:08 -0400
Subject: Bluetooth: hci_core: Remove usage of hci_req_sync

hci_request functions are considered deprecated so this replaces the
usage of hci_req_sync with hci_inquiry_sync.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_sync.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 20168732f20e..620e6014beb2 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -131,6 +131,8 @@ int hci_update_discoverable(struct hci_dev *hdev);
 
 int hci_update_connectable_sync(struct hci_dev *hdev);
 
+int hci_inquiry_sync(struct hci_dev *hdev, u8 length, u8 num_rsp);
+
 int hci_start_discovery_sync(struct hci_dev *hdev);
 int hci_stop_discovery_sync(struct hci_dev *hdev);
 
-- 
cgit v1.2.3


From 176cbeceb5c5a740216a6be3e751e76aaddf94b9 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 1 Jul 2024 16:13:56 -0400
Subject: Bluetooth: hci_core: Don't use hci_prepare_cmd

This replaces the instance of hci_prepare_cmd with hci_cmd_sync_alloc
since the former is part of hci_request.c which is considered
deprecated.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_sync.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 620e6014beb2..a8d88247ac89 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -20,6 +20,10 @@ struct hci_cmd_sync_work_entry {
 };
 
 struct adv_info;
+
+struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode, u32 plen,
+				   const void *param, struct sock *sk);
+
 /* Function with sync suffix shall not be called with hdev->lock held as they
  * wait the command to complete and in the meantime an event could be received
  * which could attempt to acquire hdev->lock causing a deadlock.
-- 
cgit v1.2.3


From f2d89775358606c7ab6b6b6c4a02fe1e8cd270b1 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 1 Jul 2024 16:52:57 -0400
Subject: Bluetooth: hci_sync: Remove remaining dependencies of hci_request

This removes the dependencies of hci_req_init and hci_request_cancel_all
from hci_sync.c.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_sync.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index a8d88247ac89..75e052909b5f 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -8,6 +8,23 @@
 #define UINT_PTR(_handle)		((void *)((uintptr_t)_handle))
 #define PTR_UINT(_ptr)			((uintptr_t)((void *)_ptr))
 
+#define HCI_REQ_DONE	  0
+#define HCI_REQ_PEND	  1
+#define HCI_REQ_CANCELED  2
+
+#define hci_req_sync_lock(hdev)   mutex_lock(&hdev->req_lock)
+#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock)
+
+struct hci_request {
+	struct hci_dev		*hdev;
+	struct sk_buff_head	cmd_q;
+
+	/* If something goes wrong when building the HCI request, the error
+	 * value is stored in this field.
+	 */
+	int			err;
+};
+
 typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data);
 typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data,
 					    int err);
-- 
cgit v1.2.3


From 936daee9cf08c5e58c9a0fe687f52adb2d80e87d Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 1 Jul 2024 17:10:41 -0400
Subject: Bluetooth: Remove hci_request.{c,h}

This removes hci_request.{c,h} since it shall no longer be used.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/bluetooth.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index b3228bd6cd6b..5d655e109b2c 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -441,6 +441,10 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
 typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status,
 				       u16 opcode, struct sk_buff *skb);
 
+void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
+			  hci_req_complete_t *req_complete,
+			  hci_req_complete_skb_t *req_complete_skb);
+
 #define HCI_REQ_START	BIT(0)
 #define HCI_REQ_SKB	BIT(1)
 
-- 
cgit v1.2.3


From e50bfd6bb231a6e2b7221ad78dce294330238c76 Mon Sep 17 00:00:00 2001
From: Kory Maincent <kory.maincent@bootlin.com>
Date: Tue, 9 Jul 2024 15:53:33 +0200
Subject: net_tstamp: Add TIMESTAMPING SOFTWARE and HARDWARE mask

Timestamping software or hardware flags are often used as a group,
therefore adding these masks will ease future use.

I did not use SOF_TIMESTAMPING_SYS_HARDWARE flag as it is deprecated and
not used at all.

Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-1-b5317f50df2a@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/net_tstamp.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h
index eb01c37e71e0..3799c79b6c83 100644
--- a/include/linux/net_tstamp.h
+++ b/include/linux/net_tstamp.h
@@ -5,6 +5,14 @@
 
 #include <uapi/linux/net_tstamp.h>
 
+#define SOF_TIMESTAMPING_SOFTWARE_MASK	(SOF_TIMESTAMPING_RX_SOFTWARE | \
+					 SOF_TIMESTAMPING_TX_SOFTWARE | \
+					 SOF_TIMESTAMPING_SOFTWARE)
+
+#define SOF_TIMESTAMPING_HARDWARE_MASK	(SOF_TIMESTAMPING_RX_HARDWARE | \
+					 SOF_TIMESTAMPING_TX_HARDWARE | \
+					 SOF_TIMESTAMPING_RAW_HARDWARE)
+
 enum hwtstamp_source {
 	HWTSTAMP_SOURCE_NETDEV,
 	HWTSTAMP_SOURCE_PHYLIB,
-- 
cgit v1.2.3


From 2dd35600590148d843367c04975acad3c1a527c3 Mon Sep 17 00:00:00 2001
From: Kory Maincent <kory.maincent@bootlin.com>
Date: Tue, 9 Jul 2024 15:53:36 +0200
Subject: net: Change the API of PHY default timestamp to MAC

Change the API to select MAC default time stamping instead of the PHY.
Indeed the PHY is closer to the wire therefore theoretically it has less
delay than the MAC timestamping but the reality is different. Due to lower
time stamping clock frequency, latency in the MDIO bus and no PHC hardware
synchronization between different PHY, the PHY PTP is often less precise
than the MAC. The exception is for PHY designed specially for PTP case but
these devices are not very widespread. For not breaking the compatibility
default_timestamp flag has been introduced in phy_device that is set by
the phy driver to know we are using the old API behavior.

Reviewed-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-4-b5317f50df2a@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index bd68f9d8e74f..e7a38137211c 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -616,6 +616,8 @@ struct macsec_ops;
  *                 handling shall be postponed until PHY has resumed
  * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended,
  *             requiring a rerun of the interrupt handler after resume
+ * @default_timestamp: Flag indicating whether we are using the phy
+ *		       timestamp as the default one
  * @interface: enum phy_interface_t value
  * @possible_interfaces: bitmap if interface modes that the attached PHY
  *			 will switch between depending on media speed.
@@ -681,6 +683,8 @@ struct phy_device {
 	unsigned irq_suspended:1;
 	unsigned irq_rerun:1;
 
+	unsigned default_timestamp:1;
+
 	int rate_matching;
 
 	enum phy_state state;
@@ -1625,6 +1629,21 @@ static inline void phy_txtstamp(struct phy_device *phydev, struct sk_buff *skb,
 	phydev->mii_ts->txtstamp(phydev->mii_ts, skb, type);
 }
 
+/**
+ * phy_is_default_hwtstamp - Is the PHY hwtstamp the default timestamp
+ * @phydev: Pointer to phy_device
+ *
+ * This is used to get default timestamping device taking into account
+ * the new API choice, which is selecting the timestamping from MAC by
+ * default if the phydev does not have default_timestamp flag enabled.
+ *
+ * Return: True if phy is the default hw timestamp, false otherwise.
+ */
+static inline bool phy_is_default_hwtstamp(struct phy_device *phydev)
+{
+	return phy_has_hwtstamp(phydev) && phydev->default_timestamp;
+}
+
 /**
  * phy_is_internal - Convenience function for testing if a PHY is internal
  * @phydev: the phy_device struct
-- 
cgit v1.2.3


From bc5a07ed15a3d30df3e8af2ef6bb0472f1d337f9 Mon Sep 17 00:00:00 2001
From: Kory Maincent <kory.maincent@bootlin.com>
Date: Tue, 9 Jul 2024 15:53:37 +0200
Subject: net: net_tstamp: Add unspec field to hwtstamp_source enumeration

Prepare for future support of saving hwtstamp source in PTP xarray by
introducing HWTSTAMP_SOURCE_UNSPEC to hwtstamp_source enum, setting it
to 0 to match old behavior of no source defined.

Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-5-b5317f50df2a@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/net_tstamp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h
index 3799c79b6c83..662074b08c94 100644
--- a/include/linux/net_tstamp.h
+++ b/include/linux/net_tstamp.h
@@ -14,6 +14,7 @@
 					 SOF_TIMESTAMPING_RAW_HARDWARE)
 
 enum hwtstamp_source {
+	HWTSTAMP_SOURCE_UNSPEC,
 	HWTSTAMP_SOURCE_NETDEV,
 	HWTSTAMP_SOURCE_PHYLIB,
 };
-- 
cgit v1.2.3


From 2111375b85ad173d58e7b8604246a3de60950ac8 Mon Sep 17 00:00:00 2001
From: Kory Maincent <kory.maincent@bootlin.com>
Date: Tue, 9 Jul 2024 15:53:38 +0200
Subject: net: Add struct kernel_ethtool_ts_info

In prevision to add new UAPI for hwtstamp we will be limited to the struct
ethtool_ts_info that is currently passed in fixed binary format through the
ETHTOOL_GET_TS_INFO ethtool ioctl. It would be good if new kernel code
already started operating on an extensible kernel variant of that
structure, similar in concept to struct kernel_hwtstamp_config vs struct
hwtstamp_config.

Since struct ethtool_ts_info is in include/uapi/linux/ethtool.h, here
we introduce the kernel-only structure in include/linux/ethtool.h.
The manual copy is then made in the function called by ETHTOOL_GET_TS_INFO.

Acked-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-6-b5317f50df2a@bootlin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/can/dev.h         |  2 +-
 include/linux/ethtool.h         | 25 ++++++++++++++++++++++---
 include/linux/mii_timestamper.h |  2 +-
 include/linux/phy.h             |  2 +-
 include/net/dsa.h               |  2 +-
 include/soc/mscc/ocelot.h       |  2 +-
 6 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 1b92aed49363..23492213ea35 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -186,7 +186,7 @@ void close_candev(struct net_device *dev);
 int can_change_mtu(struct net_device *dev, int new_mtu);
 int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd);
 int can_ethtool_op_get_ts_info_hwts(struct net_device *dev,
-				    struct ethtool_ts_info *info);
+				    struct kernel_ethtool_ts_info *info);
 
 int register_candev(struct net_device *dev);
 void unregister_candev(struct net_device *dev);
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index e213b5508da6..6b38bbda5790 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -18,6 +18,7 @@
 #include <linux/if_ether.h>
 #include <linux/netlink.h>
 #include <uapi/linux/ethtool.h>
+#include <uapi/linux/net_tstamp.h>
 
 struct compat_ethtool_rx_flow_spec {
 	u32		flow_type;
@@ -713,6 +714,22 @@ struct ethtool_rxfh_param {
 	u8	input_xfrm;
 };
 
+/**
+ * struct kernel_ethtool_ts_info - kernel copy of struct ethtool_ts_info
+ * @cmd: command number = %ETHTOOL_GET_TS_INFO
+ * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags
+ * @phc_index: device index of the associated PHC, or -1 if there is none
+ * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values
+ * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values
+ */
+struct kernel_ethtool_ts_info {
+	u32 cmd;
+	u32 so_timestamping;
+	int phc_index;
+	enum hwtstamp_tx_types tx_types;
+	enum hwtstamp_rx_filters rx_filters;
+};
+
 /**
  * struct ethtool_ops - optional netdev operations
  * @cap_link_lanes_supported: indicates if the driver supports lanes
@@ -1020,7 +1037,7 @@ struct ethtool_ops {
 	int	(*get_dump_data)(struct net_device *,
 				 struct ethtool_dump *, void *);
 	int	(*set_dump)(struct net_device *, struct ethtool_dump *);
-	int	(*get_ts_info)(struct net_device *, struct ethtool_ts_info *);
+	int	(*get_ts_info)(struct net_device *, struct kernel_ethtool_ts_info *);
 	void	(*get_ts_stats)(struct net_device *dev,
 				struct ethtool_ts_stats *ts_stats);
 	int     (*get_module_info)(struct net_device *,
@@ -1181,7 +1198,8 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index);
 
 /* Some generic methods drivers may use in their ethtool_ops */
 u32 ethtool_op_get_link(struct net_device *dev);
-int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *eti);
+int ethtool_op_get_ts_info(struct net_device *dev,
+			   struct kernel_ethtool_ts_info *eti);
 
 /**
  * ethtool_mm_frag_size_add_to_min - Translate (standard) additional fragment
@@ -1230,7 +1248,8 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add,
  * @info: buffer to hold the result
  * Returns zero on success, non-zero otherwise.
  */
-int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info);
+int ethtool_get_ts_info_by_layer(struct net_device *dev,
+				 struct kernel_ethtool_ts_info *info);
 
 /**
  * ethtool_sprintf - Write formatted string to ethtool string data
diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h
index 26b04f73f214..995db62570f9 100644
--- a/include/linux/mii_timestamper.h
+++ b/include/linux/mii_timestamper.h
@@ -59,7 +59,7 @@ struct mii_timestamper {
 			   struct phy_device *phydev);
 
 	int  (*ts_info)(struct mii_timestamper *mii_ts,
-			struct ethtool_ts_info *ts_info);
+			struct kernel_ethtool_ts_info *ts_info);
 
 	struct device *device;
 };
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e7a38137211c..04ae5c811cfb 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1618,7 +1618,7 @@ static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb,
 }
 
 static inline int phy_ts_info(struct phy_device *phydev,
-			      struct ethtool_ts_info *tsinfo)
+			      struct kernel_ethtool_ts_info *tsinfo)
 {
 	return phydev->mii_ts->ts_info(phydev->mii_ts, tsinfo);
 }
diff --git a/include/net/dsa.h b/include/net/dsa.h
index f9ae3ca66b6f..96efdd9f90c9 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -934,7 +934,7 @@ struct dsa_switch_ops {
 	 * ethtool timestamp info
 	 */
 	int	(*get_ts_info)(struct dsa_switch *ds, int port,
-			       struct ethtool_ts_info *ts);
+			       struct kernel_ethtool_ts_info *ts);
 
 	/*
 	 * ethtool MAC merge layer
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 1e1b40f4e664..6a37b29f4b4c 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -1016,7 +1016,7 @@ void ocelot_port_get_eth_mac_stats(struct ocelot *ocelot, int port,
 void ocelot_port_get_eth_phy_stats(struct ocelot *ocelot, int port,
 				   struct ethtool_eth_phy_stats *phy_stats);
 int ocelot_get_ts_info(struct ocelot *ocelot, int port,
-		       struct ethtool_ts_info *info);
+		       struct kernel_ethtool_ts_info *info);
 void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs);
 int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled,
 			       struct netlink_ext_ack *extack);
-- 
cgit v1.2.3


From f96eb1172ed8d74cfed7da92d2045ec64028cc38 Mon Sep 17 00:00:00 2001
From: Christian Eggers <ceggers@arri.de>
Date: Mon, 15 Jul 2024 14:30:50 +0200
Subject: dsa: lan9303: consistent naming for PHY address parameter

Name it 'addr' instead of 'port' or 'phy'.

Signed-off-by: Christian Eggers <ceggers@arri.de>
Link: https://patch.msgid.link/20240715123050.21202-1-ceggers@arri.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/dsa/lan9303.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h
index b4f22112ba75..3ce7cbcc37a3 100644
--- a/include/linux/dsa/lan9303.h
+++ b/include/linux/dsa/lan9303.h
@@ -5,8 +5,8 @@ struct lan9303;
 
 struct lan9303_phy_ops {
 	/* PHY 1 and 2 access*/
-	int	(*phy_read)(struct lan9303 *chip, int port, int regnum);
-	int	(*phy_write)(struct lan9303 *chip, int port,
+	int	(*phy_read)(struct lan9303 *chip, int addr, int regnum);
+	int	(*phy_write)(struct lan9303 *chip, int addr,
 			     int regnum, u16 val);
 };
 
-- 
cgit v1.2.3


From 70de41ef78573ce958ac04ecc2b5671851723c59 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 14 Jul 2024 18:05:56 +0200
Subject: llc: Constify struct llc_conn_state_trans

'struct llc_conn_state_trans' are not modified in this driver.

Constifying this structure moves some data to a read-only section, so
increase overall security.

On a x86_64, with allmodconfig, as an example:
Before:
======
   text	   data	    bss	    dec	    hex	filename
  13923	  10896	     32	  24851	   6113	net/llc/llc_c_st.o

After:
=====
   text	   data	    bss	    dec	    hex	filename
  21859	   3328	      0	  25187	   6263	net/llc/llc_c_st.o

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/87cda89e4c9414e71d1a54bb1eb491b0e7f70375.1720973029.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/llc_c_st.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/llc_c_st.h b/include/net/llc_c_st.h
index 53823d61d8b6..a4bea0f33188 100644
--- a/include/net/llc_c_st.h
+++ b/include/net/llc_c_st.h
@@ -44,8 +44,8 @@ struct llc_conn_state_trans {
 };
 
 struct llc_conn_state {
-	u8			    current_state;
-	struct llc_conn_state_trans **transitions;
+	u8				  current_state;
+	const struct llc_conn_state_trans **transitions;
 };
 
 extern struct llc_conn_state llc_conn_state_table[];
-- 
cgit v1.2.3


From 0970bf676f86c2c4d9bf7e672f5504d390c9fce6 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 14 Jul 2024 18:15:20 +0200
Subject: llc: Constify struct llc_sap_state_trans

'struct llc_sap_state_trans' are not modified in this driver.

Constifying this structure moves some data to a read-only section, so
increase overall security.

On a x86_64, with allmodconfig, as an example:
Before:
======
   text	   data	    bss	    dec	    hex	filename
    339	    456	     24	    819	    333	net/llc/llc_s_st.o

After:
=====
   text	   data	    bss	    dec	    hex	filename
    683	    144	      0	    827	    33b	net/llc/llc_s_st.o

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/9d17587639195ee94b74ff06a11ef97d1833ee52.1720973710.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/llc_s_st.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/llc_s_st.h b/include/net/llc_s_st.h
index ed5b2fa40d32..fca49d483d20 100644
--- a/include/net/llc_s_st.h
+++ b/include/net/llc_s_st.h
@@ -29,8 +29,8 @@ struct llc_sap_state_trans {
 };
 
 struct llc_sap_state {
-	u8			   curr_state;
-	struct llc_sap_state_trans **transitions;
+	u8				 curr_state;
+	const struct llc_sap_state_trans **transitions;
 };
 
 /* only access to SAP state table */
-- 
cgit v1.2.3


From c442db3f49f27e5a60a641b2ac9a3c6320796ed6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 10 Jun 2024 20:25:17 +0900
Subject: kbuild: remove PROVIDE() for kallsyms symbols

This reimplements commit 951bcae6c5a0 ("kallsyms: Avoid weak references
for kallsyms symbols") because I am not a big fan of PROVIDE().

As an alternative solution, this commit prepends one more kallsyms step.

    KSYMS   .tmp_vmlinux.kallsyms0.S          # added
    AS      .tmp_vmlinux.kallsyms0.o          # added
    LD      .tmp_vmlinux.btf
    BTF     .btf.vmlinux.bin.o
    LD      .tmp_vmlinux.kallsyms1
    NM      .tmp_vmlinux.kallsyms1.syms
    KSYMS   .tmp_vmlinux.kallsyms1.S
    AS      .tmp_vmlinux.kallsyms1.o
    LD      .tmp_vmlinux.kallsyms2
    NM      .tmp_vmlinux.kallsyms2.syms
    KSYMS   .tmp_vmlinux.kallsyms2.S
    AS      .tmp_vmlinux.kallsyms2.o
    LD      vmlinux

Step 0 takes /dev/null as input, and generates .tmp_vmlinux.kallsyms0.o,
which has a valid kallsyms format with the empty symbol list, and can be
linked to vmlinux. Since it is really small, the added compile-time cost
is negligible.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
---
 include/asm-generic/vmlinux.lds.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5703526d6ebf..62b4cb0462e6 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -451,30 +451,11 @@
 #endif
 #endif
 
-/*
- * Some symbol definitions will not exist yet during the first pass of the
- * link, but are guaranteed to exist in the final link. Provide preliminary
- * definitions that will be superseded in the final link to avoid having to
- * rely on weak external linkage, which requires a GOT when used in position
- * independent code.
- */
-#define PRELIMINARY_SYMBOL_DEFINITIONS					\
-	PROVIDE(kallsyms_addresses = .);				\
-	PROVIDE(kallsyms_offsets = .);					\
-	PROVIDE(kallsyms_names = .);					\
-	PROVIDE(kallsyms_num_syms = .);					\
-	PROVIDE(kallsyms_relative_base = .);				\
-	PROVIDE(kallsyms_token_table = .);				\
-	PROVIDE(kallsyms_token_index = .);				\
-	PROVIDE(kallsyms_markers = .);					\
-	PROVIDE(kallsyms_seqs_of_names = .);
-
 /*
  * Read only Data
  */
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
-	PRELIMINARY_SYMBOL_DEFINITIONS					\
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
 		__start_rodata = .;					\
 		*(.rodata) *(.rodata.*)					\
-- 
cgit v1.2.3


From 6e5c85c003e47fd49b72ca052b9181644e04a520 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= <ast@fiberby.net>
Date: Sat, 13 Jul 2024 02:18:58 +0000
Subject: net/sched: flower: refactor control flag definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Redefine the flower control flags as an enum, so they are
included in BTF info.

Make the kernel-side enum a more explicit superset of
TCA_FLOWER_KEY_FLAGS_*, new flags still need to be added to
both enums, but at least the bit position only has to be
defined once.

FLOW_DIS_ENCAPSULATION is never set for mask, so it can't be
exposed to userspace in an unsupported flags mask error message,
so it will be placed one bit position above the last uAPI flag.

Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Asbjørn Sloth Tønnesen <ast@fiberby.net>
Reviewed-by: Davide Caratti <dcaratti@redhat.com>
Link: https://patch.msgid.link/20240713021911.1631517-2-ast@fiberby.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/flow_dissector.h | 14 +++++++++++---
 include/uapi/linux/pkt_cls.h |  3 +++
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 3e47e123934d..c3fce070b912 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -7,6 +7,7 @@
 #include <linux/siphash.h>
 #include <linux/string.h>
 #include <uapi/linux/if_ether.h>
+#include <uapi/linux/pkt_cls.h>
 
 struct bpf_prog;
 struct net;
@@ -24,9 +25,16 @@ struct flow_dissector_key_control {
 	u32	flags;
 };
 
-#define FLOW_DIS_IS_FRAGMENT	BIT(0)
-#define FLOW_DIS_FIRST_FRAG	BIT(1)
-#define FLOW_DIS_ENCAPSULATION	BIT(2)
+/* The control flags are kept in sync with TCA_FLOWER_KEY_FLAGS_*, as those
+ * flags are exposed to userspace in some error paths, ie. unsupported flags.
+ */
+enum flow_dissector_ctrl_flags {
+	FLOW_DIS_IS_FRAGMENT		= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT,
+	FLOW_DIS_FIRST_FRAG		= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+
+	/* These flags are internal to the kernel */
+	FLOW_DIS_ENCAPSULATION		= (TCA_FLOWER_KEY_FLAGS_MAX << 1),
+};
 
 enum flow_dissect_ret {
 	FLOW_DISSECT_RET_OUT_GOOD,
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index b6d38f5fd7c0..12db276f0c11 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -677,8 +677,11 @@ enum {
 enum {
 	TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
 	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
+	__TCA_FLOWER_KEY_FLAGS_MAX,
 };
 
+#define TCA_FLOWER_KEY_FLAGS_MAX (__TCA_FLOWER_KEY_FLAGS_MAX - 1)
+
 enum {
 	TCA_FLOWER_KEY_CFM_OPT_UNSPEC,
 	TCA_FLOWER_KEY_CFM_MD_LEVEL,
-- 
cgit v1.2.3


From bfda5a63137bc83c344c4d995f404c8e701ff0fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= <ast@fiberby.net>
Date: Sat, 13 Jul 2024 02:19:00 +0000
Subject: net/sched: flower: define new tunnel flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Define new TCA_FLOWER_KEY_FLAGS_* flags for use in struct
flow_dissector_key_control, covering the same flags as
currently exposed through TCA_FLOWER_KEY_ENC_FLAGS.

Put the new flags under FLOW_DIS_F_*. The idea is that we can
later, move the existing flags under FLOW_DIS_F_* as well.

The ynl flag names have been taken from the RFC iproute2 patch.

Signed-off-by: Asbjørn Sloth Tønnesen <ast@fiberby.net>
Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://patch.msgid.link/20240713021911.1631517-4-ast@fiberby.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/flow_dissector.h | 7 ++++++-
 include/uapi/linux/pkt_cls.h | 4 ++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index c3fce070b912..460ea65b9e59 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -17,7 +17,8 @@ struct sk_buff;
  * struct flow_dissector_key_control:
  * @thoff:     Transport header offset
  * @addr_type: Type of key. One of FLOW_DISSECTOR_KEY_*
- * @flags:     Key flags. Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAGENCAPSULATION)
+ * @flags:     Key flags.
+ *             Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAG|ENCAPSULATION|F_*)
  */
 struct flow_dissector_key_control {
 	u16	thoff;
@@ -31,6 +32,10 @@ struct flow_dissector_key_control {
 enum flow_dissector_ctrl_flags {
 	FLOW_DIS_IS_FRAGMENT		= TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT,
 	FLOW_DIS_FIRST_FRAG		= TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+	FLOW_DIS_F_TUNNEL_CSUM		= TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM,
+	FLOW_DIS_F_TUNNEL_DONT_FRAGMENT	= TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT,
+	FLOW_DIS_F_TUNNEL_OAM		= TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM,
+	FLOW_DIS_F_TUNNEL_CRIT_OPT	= TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT,
 
 	/* These flags are internal to the kernel */
 	FLOW_DIS_ENCAPSULATION		= (TCA_FLOWER_KEY_FLAGS_MAX << 1),
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 12db276f0c11..3dc4388e944c 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -677,6 +677,10 @@ enum {
 enum {
 	TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
 	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
+	TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM = (1 << 2),
+	TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT = (1 << 3),
+	TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM = (1 << 4),
+	TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT = (1 << 5),
 	__TCA_FLOWER_KEY_FLAGS_MAX,
 };
 
-- 
cgit v1.2.3


From 11036bd7a0b3b05c5e1f43d107ddb02abf83adb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= <ast@fiberby.net>
Date: Sat, 13 Jul 2024 02:19:06 +0000
Subject: net/sched: cls_flower: rework TCA_FLOWER_KEY_ENC_FLAGS usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch changes how TCA_FLOWER_KEY_ENC_FLAGS is used, so that
it is used with TCA_FLOWER_KEY_FLAGS_* flags, in the same way as
TCA_FLOWER_KEY_FLAGS is currently used.

Where TCA_FLOWER_KEY_FLAGS uses {key,mask}->control.flags, then
TCA_FLOWER_KEY_ENC_FLAGS now uses {key,mask}->enc_control.flags,
therefore {key,mask}->enc_flags is now unused.

As the generic fl_set_key_flags/fl_dump_key_flags() is used with
encap set to true, then fl_{set,dump}_key_enc_flags() is removed.

This breaks unreleased userspace API (net-next since 2024-06-04).

Signed-off-by: Asbjørn Sloth Tønnesen <ast@fiberby.net>
Tested-by: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Davide Caratti <dcaratti@redhat.com>
Link: https://patch.msgid.link/20240713021911.1631517-10-ast@fiberby.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/pkt_cls.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 3dc4388e944c..d36d9cdf0c00 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -554,8 +554,8 @@ enum {
 	TCA_FLOWER_KEY_SPI,		/* be32 */
 	TCA_FLOWER_KEY_SPI_MASK,	/* be32 */
 
-	TCA_FLOWER_KEY_ENC_FLAGS,	/* u32 */
-	TCA_FLOWER_KEY_ENC_FLAGS_MASK,	/* u32 */
+	TCA_FLOWER_KEY_ENC_FLAGS,	/* be32 */
+	TCA_FLOWER_KEY_ENC_FLAGS_MASK,	/* be32 */
 
 	__TCA_FLOWER_MAX,
 };
-- 
cgit v1.2.3


From db5271d50ec155abf287a27fa84e2e33a81dbd55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= <ast@fiberby.net>
Date: Sat, 13 Jul 2024 02:19:08 +0000
Subject: flow_dissector: cleanup FLOW_DISSECTOR_KEY_ENC_FLAGS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that TCA_FLOWER_KEY_ENC_FLAGS is unused, as it's
former data is stored behind TCA_FLOWER_KEY_ENC_CONTROL,
then remove the last bits of FLOW_DISSECTOR_KEY_ENC_FLAGS.

FLOW_DISSECTOR_KEY_ENC_FLAGS is unreleased, and have been
in net-next since 2024-06-04.

Signed-off-by: Asbjørn Sloth Tønnesen <ast@fiberby.net>
Tested-by: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Davide Caratti <dcaratti@redhat.com>
Link: https://patch.msgid.link/20240713021911.1631517-12-ast@fiberby.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/flow_dissector.h |  9 ---------
 include/net/ip_tunnels.h     | 12 ------------
 2 files changed, 21 deletions(-)

(limited to 'include')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 460ea65b9e59..ced79dc8e856 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -342,14 +342,6 @@ struct flow_dissector_key_cfm {
 #define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5)
 #define FLOW_DIS_CFM_MDL_MAX 7
 
-/**
- * struct flow_dissector_key_enc_flags: tunnel metadata control flags
- * @flags: tunnel control flags
- */
-struct flow_dissector_key_enc_flags {
-	u32 flags;
-};
-
 enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
 	FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
@@ -384,7 +376,6 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */
 	FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */
 	FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */
-	FLOW_DISSECTOR_KEY_ENC_FLAGS, /* struct flow_dissector_key_enc_flags */
 
 	FLOW_DISSECTOR_KEY_MAX,
 };
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 3877315cf8b8..1db2417b8ff5 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -247,18 +247,6 @@ static inline bool ip_tunnel_is_options_present(const unsigned long *flags)
 	return ip_tunnel_flags_intersect(flags, present);
 }
 
-static inline void ip_tunnel_set_encflags_present(unsigned long *flags)
-{
-	IP_TUNNEL_DECLARE_FLAGS(present) = { };
-
-	__set_bit(IP_TUNNEL_CSUM_BIT, present);
-	__set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, present);
-	__set_bit(IP_TUNNEL_OAM_BIT, present);
-	__set_bit(IP_TUNNEL_CRIT_OPT_BIT, present);
-
-	ip_tunnel_flags_or(flags, flags, present);
-}
-
 static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags)
 {
 	IP_TUNNEL_DECLARE_FLAGS(supp) = { };
-- 
cgit v1.2.3


From a325742505f6f08141fd7a530a82a907780bfd2c Mon Sep 17 00:00:00 2001
From: Tio Zhang <tiozhang@didiglobal.com>
Date: Wed, 3 Jul 2024 11:33:53 +0800
Subject: tracing/sched: sched_switch: place prev_comm and next_comm in right
 order

Switch the order of prev_comm and next_comm in sched_switch's code to
align with its printing order.

Cc: <mhiramat@kernel.org>
Signed-off-by: Tio Zhang <tiozhang@didiglobal.com>
Link: https://lore.kernel.org/20240703033353.GA2833@didi-ThinkCentre-M930t-N000
Reviewed-by: Madadi Vineeth Reddy <vineethr@linux.ibm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/events/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 6df2b4685b08..9ea4c404bd4e 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -239,11 +239,11 @@ TRACE_EVENT(sched_switch,
 	),
 
 	TP_fast_assign(
-		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
 		__entry->prev_pid	= prev->pid;
 		__entry->prev_prio	= prev->prio;
 		__entry->prev_state	= __trace_sched_switch_state(preempt, prev_state, prev);
-		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
 		__entry->next_pid	= next->pid;
 		__entry->next_prio	= next->prio;
 		/* XXX SCHED_DEADLINE */
-- 
cgit v1.2.3


From c5eaf1b3f824369b6dcb33a39232b871d2ca8e33 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Fri, 12 Jul 2024 08:49:12 -0700
Subject: PCI: Add Meta Platforms vendor ID

Add Meta as a vendor ID for PCI devices so we can use the macro for future
drivers.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/172079935272.1778861.13619056509276833225.stgit@ahduyck-xeon-server.home.arpa
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 677aea20d3e1..76a8f2d6bd64 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2601,6 +2601,8 @@
 
 #define PCI_VENDOR_ID_HYGON		0x1d94
 
+#define PCI_VENDOR_ID_META		0x1d9b
+
 #define PCI_VENDOR_ID_FUNGIBLE		0x1dad
 
 #define PCI_VENDOR_ID_HXT		0x1dbf
-- 
cgit v1.2.3


From 33c97e7c0338d921c2ae0c2e54bf17121007fb94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=BCnther=20Noack?= <gnoack@google.com>
Date: Thu, 11 Jul 2024 16:54:57 +0000
Subject: landlock: Clarify documentation for struct landlock_ruleset_attr
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The explanation for @handled_access_fs and @handled_access_net has
significant overlap and is better explained together.

* Explain the commonalities in structure-level documentation.
* Clarify some wording and break up longer sentences.
* Put emphasis on the word "handled" to make it clearer that "handled"
  is a term with special meaning in the context of Landlock.

I'd like to transfer this wording into the man pages as well.

Signed-off-by: Günther Noack <gnoack@google.com>
Cc: Alejandro Colomar <alx@kernel.org>
Cc: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Cc: linux-security-module@vger.kernel.org
Link: https://lore.kernel.org/r/20240711165456.2148590-2-gnoack@google.com
[mic: Format commit message]
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 include/uapi/linux/landlock.h | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index 68625e728f43..e76186da3260 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -12,29 +12,36 @@
 #include <linux/types.h>
 
 /**
- * struct landlock_ruleset_attr - Ruleset definition
+ * struct landlock_ruleset_attr - Ruleset definition.
  *
- * Argument of sys_landlock_create_ruleset().  This structure can grow in
- * future versions.
+ * Argument of sys_landlock_create_ruleset().
+ *
+ * This structure defines a set of *handled access rights*, a set of actions on
+ * different object types, which should be denied by default when the ruleset is
+ * enacted.  Vice versa, access rights that are not specifically listed here are
+ * not going to be denied by this ruleset when it is enacted.
+ *
+ * For historical reasons, the %LANDLOCK_ACCESS_FS_REFER right is always denied
+ * by default, even when its bit is not set in @handled_access_fs.  In order to
+ * add new rules with this access right, the bit must still be set explicitly
+ * (cf. `Filesystem flags`_).
+ *
+ * The explicit listing of *handled access rights* is required for backwards
+ * compatibility reasons.  In most use cases, processes that use Landlock will
+ * *handle* a wide range or all access rights that they know about at build time
+ * (and that they have tested with a kernel that supported them all).
+ *
+ * This structure can grow in future Landlock versions.
  */
 struct landlock_ruleset_attr {
 	/**
-	 * @handled_access_fs: Bitmask of actions (cf. `Filesystem flags`_)
-	 * that is handled by this ruleset and should then be forbidden if no
-	 * rule explicitly allow them: it is a deny-by-default list that should
-	 * contain as much Landlock access rights as possible. Indeed, all
-	 * Landlock filesystem access rights that are not part of
-	 * handled_access_fs are allowed.  This is needed for backward
-	 * compatibility reasons.  One exception is the
-	 * %LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly
-	 * handled, but must still be explicitly handled to add new rules with
-	 * this access right.
+	 * @handled_access_fs: Bitmask of handled filesystem actions
+	 * (cf. `Filesystem flags`_).
 	 */
 	__u64 handled_access_fs;
 	/**
-	 * @handled_access_net: Bitmask of actions (cf. `Network flags`_)
-	 * that is handled by this ruleset and should then be forbidden if no
-	 * rule explicitly allow them.
+	 * @handled_access_net: Bitmask of handled network actions (cf. `Network
+	 * flags`_).
 	 */
 	__u64 handled_access_net;
 };
-- 
cgit v1.2.3


From 415fb383ec2bbe4d4bb1a1b5593cd67eb058f1de Mon Sep 17 00:00:00 2001
From: Francis Pravin <francis.p@samsung.com>
Date: Mon, 15 Jul 2024 06:31:57 +0530
Subject: nvme-core: choose PIF from QPIF if QPIFS supports and PIF is QTYPE

As per TP4141a:
"If the Qualified Protection Information Format Support(QPIFS) bit is
set to 1 and the Protection Information Format(PIF) field is set to 11b
(i.e., Qualified Type), then the pif is as defined in the Qualified
Protection Information Format (QPIF) field."
So, choose PIF from QPIF if QPIFS supports and PIF is QTYPE.

Signed-off-by: Francis Pravin <francis.p@samsung.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 57e27e48c913..5c2290f0d5af 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -483,6 +483,9 @@ enum {
 	NVME_ID_NS_NVM_STS_MASK		= 0x7f,
 	NVME_ID_NS_NVM_GUARD_SHIFT	= 7,
 	NVME_ID_NS_NVM_GUARD_MASK	= 0x3,
+	NVME_ID_NS_NVM_QPIF_SHIFT	= 9,
+	NVME_ID_NS_NVM_QPIF_MASK	= 0xf,
+	NVME_ID_NS_NVM_QPIFS		= 1 << 3,
 };
 
 static inline __u8 nvme_elbaf_sts(__u32 elbaf)
@@ -495,6 +498,11 @@ static inline __u8 nvme_elbaf_guard_type(__u32 elbaf)
 	return (elbaf >> NVME_ID_NS_NVM_GUARD_SHIFT) & NVME_ID_NS_NVM_GUARD_MASK;
 }
 
+static inline __u8 nvme_elbaf_qualified_guard_type(__u32 elbaf)
+{
+	return (elbaf >> NVME_ID_NS_NVM_QPIF_SHIFT) & NVME_ID_NS_NVM_QPIF_MASK;
+}
+
 struct nvme_id_ctrl_nvm {
 	__u8	vsl;
 	__u8	wzsl;
@@ -574,6 +582,7 @@ enum {
 	NVME_NVM_NS_16B_GUARD	= 0,
 	NVME_NVM_NS_32B_GUARD	= 1,
 	NVME_NVM_NS_64B_GUARD	= 2,
+	NVME_NVM_NS_QTYPE_GUARD	= 3,
 };
 
 static inline __u8 nvme_lbaf_index(__u8 flbas)
-- 
cgit v1.2.3


From 88caf544c9305313e1c48ac1a437faa5df8fff06 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Mon, 1 Jul 2024 17:31:46 -0500
Subject: KVM: SEV: Provide support for SNP_GUEST_REQUEST NAE event

Version 2 of GHCB specification added support for the SNP Guest Request
Message NAE event. The event allows for an SEV-SNP guest to make
requests to the SEV-SNP firmware through the hypervisor using the
SNP_GUEST_REQUEST API defined in the SEV-SNP firmware specification.

This is used by guests primarily to request attestation reports from
firmware. There are other request types are available as well, but the
specifics of what guest requests are being made generally does not
affect how they are handled by the hypervisor, which only serves as a
proxy for the guest requests and firmware responses.

Implement handling for these events.

When an SNP Guest Request is issued, the guest will provide its own
request/response pages, which could in theory be passed along directly
to firmware. However, these pages would need special care:

  - Both pages are from shared guest memory, so they need to be
    protected from migration/etc. occurring while firmware reads/writes
    to them. At a minimum, this requires elevating the ref counts and
    potentially needing an explicit pinning of the memory. This places
    additional restrictions on what type of memory backends userspace
    can use for shared guest memory since there would be some reliance
    on using refcounted pages.

  - The response page needs to be switched to Firmware-owned state
    before the firmware can write to it, which can lead to potential
    host RMP #PFs if the guest is misbehaved and hands the host a
    guest page that KVM is writing to for other reasons (e.g. virtio
    buffers).

Both of these issues can be avoided completely by using
separately-allocated bounce pages for both the request/response pages
and passing those to firmware instead. So that's the approach taken
here.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Co-developed-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
Co-developed-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
[mdr: ensure FW command failures are indicated to guest, drop extended
 request handling to be re-written as separate patch, massage commit]
Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-ID: <20240701223148.3798365-2-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/sev-guest.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h
index 154a87a1eca9..fcdfea767fca 100644
--- a/include/uapi/linux/sev-guest.h
+++ b/include/uapi/linux/sev-guest.h
@@ -89,6 +89,9 @@ struct snp_ext_report_req {
 #define SNP_GUEST_FW_ERR_MASK		GENMASK_ULL(31, 0)
 #define SNP_GUEST_VMM_ERR_SHIFT		32
 #define SNP_GUEST_VMM_ERR(x)		(((u64)x) << SNP_GUEST_VMM_ERR_SHIFT)
+#define SNP_GUEST_FW_ERR(x)		((x) & SNP_GUEST_FW_ERR_MASK)
+#define SNP_GUEST_ERR(vmm_err, fw_err)	(SNP_GUEST_VMM_ERR(vmm_err) | \
+					 SNP_GUEST_FW_ERR(fw_err))
 
 #define SNP_GUEST_VMM_ERR_INVALID_LEN	1
 #define SNP_GUEST_VMM_ERR_BUSY		2
-- 
cgit v1.2.3


From de1177e56005dccd9729d4ca331de64f5d463b90 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 8 Jul 2024 09:47:57 +0200
Subject: virtio: make virtio_find_vqs() call virtio_find_vqs_ctx()

In order to prepare for conversion of virtio_find_vqs*() arguments, make
virtio_find_vqs() to call virtio_find_vqs_ctx() instead of op directly.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Message-Id: <20240708074814.1739223-3-jiri@resnulli.us>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index da9b271b54db..d19eaf6bafbf 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -225,22 +225,23 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 }
 
 static inline
-int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
 			struct virtqueue *vqs[], vq_callback_t *callbacks[],
-			const char * const names[],
+			const char * const names[], const bool *ctx,
 			struct irq_affinity *desc)
 {
-	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc);
+	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx,
+				      desc);
 }
 
 static inline
-int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
+int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			struct virtqueue *vqs[], vq_callback_t *callbacks[],
-			const char * const names[], const bool *ctx,
+			const char * const names[],
 			struct irq_affinity *desc)
 {
-	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx,
-				      desc);
+	return virtio_find_vqs_ctx(vdev, nvqs, vqs, callbacks,
+				   names, NULL, desc);
 }
 
 /**
-- 
cgit v1.2.3


From 959538c11a88298672195a0eb3289c0f2f88b02d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 8 Jul 2024 09:47:58 +0200
Subject: virtio: make virtio_find_single_vq() call virtio_find_vqs()

In order to prepare for conversion of virtio_find_vqs*() arguments, make
virtio_find_single_vq() to call virtio_find_vqs() instead of
op directly.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Message-Id: <20240708074814.1739223-4-jiri@resnulli.us>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index d19eaf6bafbf..82a1d798b2f1 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -210,20 +210,6 @@ static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev)
 	return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM);
 }
 
-static inline
-struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
-					vq_callback_t *c, const char *n)
-{
-	vq_callback_t *callbacks[] = { c };
-	const char *names[] = { n };
-	struct virtqueue *vq;
-	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL,
-					 NULL);
-	if (err < 0)
-		return ERR_PTR(err);
-	return vq;
-}
-
 static inline
 int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
 			struct virtqueue *vqs[], vq_callback_t *callbacks[],
@@ -244,6 +230,20 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 				   names, NULL, desc);
 }
 
+static inline
+struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
+					vq_callback_t *c, const char *n)
+{
+	vq_callback_t *callbacks[] = { c };
+	const char *names[] = { n };
+	struct virtqueue *vq;
+	int err = virtio_find_vqs(vdev, 1, &vq, callbacks, names, NULL);
+
+	if (err < 0)
+		return ERR_PTR(err);
+	return vq;
+}
+
 /**
  * virtio_synchronize_cbs - synchronize with virtqueue callbacks
  * @dev: the virtio device
-- 
cgit v1.2.3


From c502eb85c34e29bb185e3d15dd9b8fce8a74f303 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 8 Jul 2024 09:47:59 +0200
Subject: virtio: introduce virtio_queue_info struct and find_vqs_info() config
 op

Introduce a structure virtio_queue_info to carry name, callback and ctx
together. In order to allow config implementations to accept config op
with array of virtio_queue_info structures, introduce a new
find_vqs_info() op. Do the needed conversion in virtio_find_vqs_ctx().
Note that whole virtio_find_vqs_ctx() is going to be eventually removed
at the and of this patchset.

Suggested-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Message-Id: <20240708074814.1739223-5-jiri@resnulli.us>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 55 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 82a1d798b2f1..fb7a1fd00ee5 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -18,6 +18,20 @@ struct virtio_shm_region {
 
 typedef void vq_callback_t(struct virtqueue *);
 
+/**
+ * struct virtqueue_info - Info for a virtqueue passed to find_vqs().
+ * @name: virtqueue description. Used mainly for debugging, NULL for
+ *        a virtqueue unused by the driver.
+ * @callback: A callback to invoke on a used buffer notification.
+ *            NULL for a virtqueue that does not need a callback.
+ * @ctx: A flag to indicate to maintain an extra context per virtqueue.
+ */
+struct virtqueue_info {
+	const char *name;
+	vq_callback_t *callback;
+	bool ctx;
+};
+
 /**
  * struct virtio_config_ops - operations for configuring a virtio device
  * Note: Do not assume that a transport implements all of the operations
@@ -58,6 +72,12 @@ typedef void vq_callback_t(struct virtqueue *);
  *	names: array of virtqueue names (mainly for debugging)
  *		include a NULL entry for vqs unused by driver
  *	Returns 0 on success or error status
+ * @find_vqs_info: find virtqueues and instantiate them.
+ *	vdev: the virtio_device
+ *	nvqs: the number of virtqueues to find
+ *	vqs: on success, includes new virtqueues
+ *	vqs_info: array of virtqueue info structures
+ *	Returns 0 on success or error status
  * @del_vqs: free virtqueues found by find_vqs().
  * @synchronize_cbs: synchronize with the virtqueue callbacks (optional)
  *      The function guarantees that all memory operations on the
@@ -109,6 +129,10 @@ struct virtio_config_ops {
 			struct virtqueue *vqs[], vq_callback_t *callbacks[],
 			const char * const names[], const bool *ctx,
 			struct irq_affinity *desc);
+	int (*find_vqs_info)(struct virtio_device *vdev, unsigned int nvqs,
+			     struct virtqueue *vqs[],
+			     struct virtqueue_info vqs_info[],
+			     struct irq_affinity *desc);
 	void (*del_vqs)(struct virtio_device *);
 	void (*synchronize_cbs)(struct virtio_device *);
 	u64 (*get_features)(struct virtio_device *vdev);
@@ -117,7 +141,7 @@ struct virtio_config_ops {
 	int (*set_vq_affinity)(struct virtqueue *vq,
 			       const struct cpumask *cpu_mask);
 	const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev,
-			int index);
+						 int index);
 	bool (*get_shm_region)(struct virtio_device *vdev,
 			       struct virtio_shm_region *region, u8 id);
 	int (*disable_vq_and_reset)(struct virtqueue *vq);
@@ -210,14 +234,39 @@ static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev)
 	return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM);
 }
 
+static inline
+int virtio_find_vqs_info(struct virtio_device *vdev, unsigned int nvqs,
+			 struct virtqueue *vqs[],
+			 struct virtqueue_info vqs_info[],
+			 struct irq_affinity *desc)
+{
+	return vdev->config->find_vqs_info(vdev, nvqs, vqs, vqs_info, desc);
+}
+
 static inline
 int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
 			struct virtqueue *vqs[], vq_callback_t *callbacks[],
 			const char * const names[], const bool *ctx,
 			struct irq_affinity *desc)
 {
-	return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx,
-				      desc);
+	struct virtqueue_info *vqs_info;
+	int err, i;
+
+	if (!vdev->config->find_vqs_info)
+		return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks,
+					      names, ctx, desc);
+
+	vqs_info = kmalloc_array(nvqs, sizeof(*vqs_info), GFP_KERNEL);
+	if (!vqs_info)
+		return -ENOMEM;
+	for (i = 0; i < nvqs; i++) {
+		vqs_info[i].name = names[i];
+		vqs_info[i].callback = callbacks[i];
+		vqs_info[i].ctx = ctx ? ctx[i] : false;
+	}
+	err = virtio_find_vqs_info(vdev, nvqs, vqs, vqs_info, desc);
+	kfree(vqs_info);
+	return err;
 }
 
 static inline
-- 
cgit v1.2.3


From 992648f5a61f1c9fab800fbc42dea31727cd8a55 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 8 Jul 2024 09:48:02 +0200
Subject: virtio: call virtio_find_vqs_info() from virtio_find_single_vq()
 directly

Since there are no more implementations of find_vqs() op, call
virtio_find_vqs_info() from virtio_find_single_vq() directly.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Message-Id: <20240708074814.1739223-8-jiri@resnulli.us>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index fb7a1fd00ee5..23ce1aec54e8 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -283,10 +283,11 @@ static inline
 struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 					vq_callback_t *c, const char *n)
 {
-	vq_callback_t *callbacks[] = { c };
-	const char *names[] = { n };
+	struct virtqueue_info vqs_info[] = {
+		{ n, c },
+	};
 	struct virtqueue *vq;
-	int err = virtio_find_vqs(vdev, 1, &vq, callbacks, names, NULL);
+	int err = virtio_find_vqs_info(vdev, 1, &vq, vqs_info, NULL);
 
 	if (err < 0)
 		return ERR_PTR(err);
-- 
cgit v1.2.3


From 18cd029fd7f7a85d522973f34c413a2b009255ac Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 8 Jul 2024 09:48:03 +0200
Subject: virtio: remove the original find_vqs() op

As it is no longer used, remove it.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Message-Id: <20240708074814.1739223-9-jiri@resnulli.us>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 23ce1aec54e8..1ae346bd2dbd 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -63,15 +63,6 @@ struct virtqueue_info {
  *	After this, status and feature negotiation must be done again
  *	Device must not be reset from its vq/config callbacks, or in
  *	parallel with being added/removed.
- * @find_vqs: find virtqueues and instantiate them.
- *	vdev: the virtio_device
- *	nvqs: the number of virtqueues to find
- *	vqs: on success, includes new virtqueues
- *	callbacks: array of callbacks, for each virtqueue
- *		include a NULL entry for vqs that do not need a callback
- *	names: array of virtqueue names (mainly for debugging)
- *		include a NULL entry for vqs unused by driver
- *	Returns 0 on success or error status
  * @find_vqs_info: find virtqueues and instantiate them.
  *	vdev: the virtio_device
  *	nvqs: the number of virtqueues to find
@@ -125,10 +116,6 @@ struct virtio_config_ops {
 	u8 (*get_status)(struct virtio_device *vdev);
 	void (*set_status)(struct virtio_device *vdev, u8 status);
 	void (*reset)(struct virtio_device *vdev);
-	int (*find_vqs)(struct virtio_device *, unsigned nvqs,
-			struct virtqueue *vqs[], vq_callback_t *callbacks[],
-			const char * const names[], const bool *ctx,
-			struct irq_affinity *desc);
 	int (*find_vqs_info)(struct virtio_device *vdev, unsigned int nvqs,
 			     struct virtqueue *vqs[],
 			     struct virtqueue_info vqs_info[],
@@ -252,10 +239,6 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
 	struct virtqueue_info *vqs_info;
 	int err, i;
 
-	if (!vdev->config->find_vqs_info)
-		return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks,
-					      names, ctx, desc);
-
 	vqs_info = kmalloc_array(nvqs, sizeof(*vqs_info), GFP_KERNEL);
 	if (!vqs_info)
 		return -ENOMEM;
-- 
cgit v1.2.3


From b49503eaf9c74c3d0efd1e8331aba37ce3c1fd68 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 8 Jul 2024 09:48:04 +0200
Subject: virtio: rename find_vqs_info() op to find_vqs()

Since the original find_vqs() is no longer present, rename
find_vqs_info() back to find_vqs().

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Message-Id: <20240708074814.1739223-10-jiri@resnulli.us>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 1ae346bd2dbd..7848fba7f48a 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -63,7 +63,7 @@ struct virtqueue_info {
  *	After this, status and feature negotiation must be done again
  *	Device must not be reset from its vq/config callbacks, or in
  *	parallel with being added/removed.
- * @find_vqs_info: find virtqueues and instantiate them.
+ * @find_vqs: find virtqueues and instantiate them.
  *	vdev: the virtio_device
  *	nvqs: the number of virtqueues to find
  *	vqs: on success, includes new virtqueues
@@ -116,10 +116,10 @@ struct virtio_config_ops {
 	u8 (*get_status)(struct virtio_device *vdev);
 	void (*set_status)(struct virtio_device *vdev, u8 status);
 	void (*reset)(struct virtio_device *vdev);
-	int (*find_vqs_info)(struct virtio_device *vdev, unsigned int nvqs,
-			     struct virtqueue *vqs[],
-			     struct virtqueue_info vqs_info[],
-			     struct irq_affinity *desc);
+	int (*find_vqs)(struct virtio_device *vdev, unsigned int nvqs,
+			struct virtqueue *vqs[],
+			struct virtqueue_info vqs_info[],
+			struct irq_affinity *desc);
 	void (*del_vqs)(struct virtio_device *);
 	void (*synchronize_cbs)(struct virtio_device *);
 	u64 (*get_features)(struct virtio_device *vdev);
@@ -227,7 +227,7 @@ int virtio_find_vqs_info(struct virtio_device *vdev, unsigned int nvqs,
 			 struct virtqueue_info vqs_info[],
 			 struct irq_affinity *desc)
 {
-	return vdev->config->find_vqs_info(vdev, nvqs, vqs, vqs_info, desc);
+	return vdev->config->find_vqs(vdev, nvqs, vqs, vqs_info, desc);
 }
 
 static inline
-- 
cgit v1.2.3


From 3e8d51c7765dac2a70fe2ad222a8ad244c9f29a4 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 8 Jul 2024 09:48:13 +0200
Subject: virtio: remove unused virtio_find_vqs() and virtio_find_vqs_ctx()
 helpers

All callers of virtio_find_vqs() and virtio_find_vqs_ctx() were
converted to use virtio_find_vqs_info(). Remove no longer used helpers.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Message-Id: <20240708074814.1739223-19-jiri@resnulli.us>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 32 --------------------------------
 1 file changed, 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7848fba7f48a..b6b615c3e21d 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -230,38 +230,6 @@ int virtio_find_vqs_info(struct virtio_device *vdev, unsigned int nvqs,
 	return vdev->config->find_vqs(vdev, nvqs, vqs, vqs_info, desc);
 }
 
-static inline
-int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
-			struct virtqueue *vqs[], vq_callback_t *callbacks[],
-			const char * const names[], const bool *ctx,
-			struct irq_affinity *desc)
-{
-	struct virtqueue_info *vqs_info;
-	int err, i;
-
-	vqs_info = kmalloc_array(nvqs, sizeof(*vqs_info), GFP_KERNEL);
-	if (!vqs_info)
-		return -ENOMEM;
-	for (i = 0; i < nvqs; i++) {
-		vqs_info[i].name = names[i];
-		vqs_info[i].callback = callbacks[i];
-		vqs_info[i].ctx = ctx ? ctx[i] : false;
-	}
-	err = virtio_find_vqs_info(vdev, nvqs, vqs, vqs_info, desc);
-	kfree(vqs_info);
-	return err;
-}
-
-static inline
-int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-			struct virtqueue *vqs[], vq_callback_t *callbacks[],
-			const char * const names[],
-			struct irq_affinity *desc)
-{
-	return virtio_find_vqs_ctx(vdev, nvqs, vqs, callbacks,
-				   names, NULL, desc);
-}
-
 static inline
 struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 					vq_callback_t *c, const char *n)
-- 
cgit v1.2.3


From 6c85d6b653caeba2ef982925703cbb4f2b3b3163 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 8 Jul 2024 09:48:14 +0200
Subject: virtio: rename virtio_find_vqs_info() to virtio_find_vqs()

Since the original virtio_find_vqs() is no longer present, rename
virtio_find_vqs_info() back to virtio_find_vqs().

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Message-Id: <20240708074814.1739223-20-jiri@resnulli.us>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index b6b615c3e21d..ab4b9a3fef6b 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -222,10 +222,10 @@ static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev)
 }
 
 static inline
-int virtio_find_vqs_info(struct virtio_device *vdev, unsigned int nvqs,
-			 struct virtqueue *vqs[],
-			 struct virtqueue_info vqs_info[],
-			 struct irq_affinity *desc)
+int virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
+		    struct virtqueue *vqs[],
+		    struct virtqueue_info vqs_info[],
+		    struct irq_affinity *desc)
 {
 	return vdev->config->find_vqs(vdev, nvqs, vqs, vqs_info, desc);
 }
@@ -238,7 +238,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 		{ n, c },
 	};
 	struct virtqueue *vq;
-	int err = virtio_find_vqs_info(vdev, 1, &vq, vqs_info, NULL);
+	int err = virtio_find_vqs(vdev, 1, &vq, vqs_info, NULL);
 
 	if (err < 0)
 		return ERR_PTR(err);
-- 
cgit v1.2.3


From 332d2c1d713e232e163386c35a3ba0c1b90df83f Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Wed, 1 May 2024 03:52:10 -0500
Subject: crypto: ccp: Add the SNP_VLEK_LOAD command

When requesting an attestation report a guest is able to specify whether
it wants SNP firmware to sign the report using either a Versioned Chip
Endorsement Key (VCEK), which is derived from chip-unique secrets, or a
Versioned Loaded Endorsement Key (VLEK) which is obtained from an AMD
Key Derivation Service (KDS) and derived from seeds allocated to
enrolled cloud service providers (CSPs).

For VLEK keys, an SNP_VLEK_LOAD SNP firmware command is used to load
them into the system after obtaining them from the KDS. Add a
corresponding userspace interface so to allow the loading of VLEK keys
into the system.

See SEV-SNP Firmware ABI 1.54, SNP_VLEK_LOAD for more details.

Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-ID: <20240501085210.2213060-21-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/psp-sev.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h
index b7a2c2ee35b7..2289b7c76c59 100644
--- a/include/uapi/linux/psp-sev.h
+++ b/include/uapi/linux/psp-sev.h
@@ -31,6 +31,7 @@ enum {
 	SNP_PLATFORM_STATUS,
 	SNP_COMMIT,
 	SNP_SET_CONFIG,
+	SNP_VLEK_LOAD,
 
 	SEV_MAX,
 };
@@ -214,6 +215,32 @@ struct sev_user_data_snp_config {
 	__u8 rsvd1[52];
 } __packed;
 
+/**
+ * struct sev_data_snp_vlek_load - SNP_VLEK_LOAD structure
+ *
+ * @len: length of the command buffer read by the PSP
+ * @vlek_wrapped_version: version of wrapped VLEK hashstick (Must be 0h)
+ * @rsvd: reserved
+ * @vlek_wrapped_address: address of a wrapped VLEK hashstick
+ *                        (struct sev_user_data_snp_wrapped_vlek_hashstick)
+ */
+struct sev_user_data_snp_vlek_load {
+	__u32 len;				/* In */
+	__u8 vlek_wrapped_version;		/* In */
+	__u8 rsvd[3];				/* In */
+	__u64 vlek_wrapped_address;		/* In */
+} __packed;
+
+/**
+ * struct sev_user_data_snp_vlek_wrapped_vlek_hashstick - Wrapped VLEK data
+ *
+ * @data: Opaque data provided by AMD KDS (as described in SEV-SNP Firmware ABI
+ *        1.54, SNP_VLEK_LOAD)
+ */
+struct sev_user_data_snp_wrapped_vlek_hashstick {
+	__u8 data[432];				/* In */
+} __packed;
+
 /**
  * struct sev_issue_cmd - SEV ioctl parameters
  *
-- 
cgit v1.2.3


From a7526fe8b94eced7d82aa00b2bcca44e39ae0769 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 11 Jul 2024 18:35:30 +0200
Subject: mm, slab: put should_failslab() back behind CONFIG_SHOULD_FAILSLAB

Patch series "revert unconditional slab and page allocator fault injection
calls".

These two patches largely revert commits that added function call overhead
into slab and page allocation hotpaths and that cannot be currently
disabled even though related CONFIG_ options do exist.

A much more involved solution that can keep the callsites always existing
but hidden behind a static key if unused, is possible [1] and can be
pursued by anyone who believes it's necessary.  Meanwhile the fact the
should_failslab() error injection is already not functional on kernels
built with current gcc without anyone noticing [2], and lukewarm response
to [1] suggests the need is not there.  I believe it will be more fair to
have the state after this series as a baseline for possible further
optimisation, instead of the unconditional overhead.

For example a possible compromise for anyone who's fine with an empty
function call overhead but not the full CONFIG_FAILSLAB /
CONFIG_FAIL_PAGE_ALLOC overhead is to reuse patch 1 from [1] but insert a
static key check only inside should_failslab() and
should_fail_alloc_page() before performing the more expensive checks.

[1] https://lore.kernel.org/all/20240620-fault-injection-statickeys-v2-0-e23947d3d84b@suse.cz/#t
[2] https://github.com/bpftrace/bpftrace/issues/3258


This patch (of 2):

This mostly reverts commit 4f6923fbb352 ("mm: make should_failslab always
available for fault injection").  The commit made should_failslab() a
noinline function that's always called from the slab allocation hotpath,
even if it's empty because CONFIG_SHOULD_FAILSLAB is not enabled, and
there is no option to disable that call.  This is visible in profiles and
the function call overhead can be noticeable especially with cpu
mitigations.

Meanwhile the bpftrace program example in the commit silently does not
work without CONFIG_SHOULD_FAILSLAB anyway with a recent gcc, because the
empty function gets a .constprop clone that is actually being called
(uselessly) from the slab hotpath, while the error injection is hooked to
the original function that's not being called at all [1].

Thus put the whole should_failslab() function back behind
CONFIG_SHOULD_FAILSLAB.  It's not a complete revert of 4f6923fbb352 - the
int return type that returns -ENOMEM on failure is preserved, as well
ALLOW_ERROR_INJECTION annotation.  The BTF_ID() record that was meanwhile
added is also guarded by CONFIG_SHOULD_FAILSLAB.

[1] https://github.com/bpftrace/bpftrace/issues/3258

Link: https://lkml.kernel.org/r/20240711-b4-fault-injection-reverts-v1-0-9e2651945d68@suse.cz
Link: https://lkml.kernel.org/r/20240711-b4-fault-injection-reverts-v1-1-9e2651945d68@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Rientjes <rientjes@google.com>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fault-inject.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 6d5edef09d45..be6d0bc111ad 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -102,11 +102,10 @@ static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 }
 #endif /* CONFIG_FAIL_PAGE_ALLOC */
 
-int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #ifdef CONFIG_FAILSLAB
-extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
+int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #else
-static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+static inline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
 	return false;
 }
-- 
cgit v1.2.3


From 53dabce2652fb854eae84609ce9c37429d5d87ba Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 11 Jul 2024 18:35:31 +0200
Subject: mm, page_alloc: put should_fail_alloc_page() back behing
 CONFIG_FAIL_PAGE_ALLOC

This mostly reverts commit af3b854492f3 ("mm/page_alloc.c: allow error
injection").  The commit made should_fail_alloc_page() a noinline function
that's always called from the page allocation hotpath, even if it's empty
because CONFIG_FAIL_PAGE_ALLOC is not enabled, and there is no option to
disable it and prevent the associated function call overhead.

As with the preceding patch "mm, slab: put should_failslab back behind
CONFIG_SHOULD_FAILSLAB" and for the same reasons, put the
should_fail_alloc_page() back behind the config option.  When enabled, the
ALLOW_ERROR_INJECTION and BTF_ID records are preserved so it's not a
complete revert.

Link: https://lkml.kernel.org/r/20240711-b4-fault-injection-reverts-v1-2-9e2651945d68@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Rientjes <rientjes@google.com>
Cc: Eduard Zingerman <eddyz87@gmail.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Cc: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fault-inject.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index be6d0bc111ad..354413950d34 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -91,12 +91,10 @@ static inline void fault_config_init(struct fault_config *config,
 
 struct kmem_cache;
 
-bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
-
 #ifdef CONFIG_FAIL_PAGE_ALLOC
-bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
+bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
 #else
-static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 {
 	return false;
 }
-- 
cgit v1.2.3


From 4810a82c8a8ae06fe6496a23fcb89a4952603e60 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Thu, 11 Jul 2024 15:04:55 -0700
Subject: lib: add missing newline character in the warning message

Link: https://lkml.kernel.org/r/20240711220457.1751071-1-surenb@google.com
Fixes: 22d407b164ff ("lib: add allocation tagging support for memory allocation profiling")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/alloc_tag.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index abd24016a900..8c61ccd161ba 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -122,7 +122,7 @@ static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag
 		  "alloc_tag was not cleared (got tag for %s:%u)\n",
 		  ref->ct->filename, ref->ct->lineno);
 
-	WARN_ONCE(!tag, "current->alloc_tag not set");
+	WARN_ONCE(!tag, "current->alloc_tag not set\n");
 }
 
 static inline void alloc_tag_sub_check(union codetag_ref *ref)
-- 
cgit v1.2.3


From fd8acc0097b91fab3104fa8a66ce2fd9cf8b0c11 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Thu, 11 Jul 2024 15:04:56 -0700
Subject: lib: reuse page_ext_data() to obtain codetag_ref

codetag_ref_from_page_ext() reimplements the same calculation as
page_ext_data().  Reuse existing function instead.

Link: https://lkml.kernel.org/r/20240711220457.1751071-2-surenb@google.com
Fixes: dcfe378c81f7 ("lib: introduce support for page allocation tagging")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgalloc_tag.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 9cacadbd61f8..acb1e9ce7981 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -15,7 +15,7 @@ extern struct page_ext_operations page_alloc_tagging_ops;
 
 static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext)
 {
-	return (void *)page_ext + page_alloc_tagging_ops.offset;
+	return (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops);
 }
 
 static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref)
-- 
cgit v1.2.3


From 6ab42fe21c84d72da752923b4bd7075344f4a362 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Thu, 11 Jul 2024 15:04:57 -0700
Subject: alloc_tag: fix page_ext_get/page_ext_put sequence during page
 splitting

pgalloc_tag_sub() might call page_ext_put() using a page different from
the one used in page_ext_get() call.  This does not pose an issue since
page_ext_put() ignores this parameter as long as it's non-NULL but
technically this is wrong.  Fix it by storing the original page used in
page_ext_get() and passing it to page_ext_put().

Link: https://lkml.kernel.org/r/20240711220457.1751071-3-surenb@google.com
Fixes: be25d1d4e822 ("mm: create new codetag references during page splitting")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Sourav Panda <souravpanda@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgalloc_tag.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index acb1e9ce7981..18cd0c0c73d9 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -71,6 +71,7 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
 static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
 {
 	int i;
+	struct page_ext *first_page_ext;
 	struct page_ext *page_ext;
 	union codetag_ref *ref;
 	struct alloc_tag *tag;
@@ -78,7 +79,7 @@ static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
 	if (!mem_alloc_profiling_enabled())
 		return;
 
-	page_ext = page_ext_get(page);
+	first_page_ext = page_ext = page_ext_get(page);
 	if (unlikely(!page_ext))
 		return;
 
@@ -94,7 +95,7 @@ static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
 		page_ext = page_ext_next(page_ext);
 	}
 out:
-	page_ext_put(page_ext);
+	page_ext_put(first_page_ext);
 }
 
 static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
-- 
cgit v1.2.3


From 667574e873b5f77a220b2a93329689f36fb56d5d Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Fri, 12 Jul 2024 11:13:14 +0800
Subject: mm/hugetlb: fix possible recursive locking detected warning

When tries to demote 1G hugetlb folios, a lockdep warning is observed:

============================================
WARNING: possible recursive locking detected
6.10.0-rc6-00452-ga4d0275fa660-dirty #79 Not tainted
--------------------------------------------
bash/710 is trying to acquire lock:
ffffffff8f0a7850 (&h->resize_lock){+.+.}-{3:3}, at: demote_store+0x244/0x460

but task is already holding lock:
ffffffff8f0a6f48 (&h->resize_lock){+.+.}-{3:3}, at: demote_store+0xae/0x460

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&h->resize_lock);
  lock(&h->resize_lock);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

4 locks held by bash/710:
 #0: ffff8f118439c3f0 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x64/0xe0
 #1: ffff8f11893b9e88 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0xf8/0x1d0
 #2: ffff8f1183dc4428 (kn->active#98){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x100/0x1d0
 #3: ffffffff8f0a6f48 (&h->resize_lock){+.+.}-{3:3}, at: demote_store+0xae/0x460

stack backtrace:
CPU: 3 PID: 710 Comm: bash Not tainted 6.10.0-rc6-00452-ga4d0275fa660-dirty #79
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x68/0xa0
 __lock_acquire+0x10f2/0x1ca0
 lock_acquire+0xbe/0x2d0
 __mutex_lock+0x6d/0x400
 demote_store+0x244/0x460
 kernfs_fop_write_iter+0x12c/0x1d0
 vfs_write+0x380/0x540
 ksys_write+0x64/0xe0
 do_syscall_64+0xb9/0x1d0
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7fa61db14887
RSP: 002b:00007ffc56c48358 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fa61db14887
RDX: 0000000000000002 RSI: 000055a030050220 RDI: 0000000000000001
RBP: 000055a030050220 R08: 00007fa61dbd1460 R09: 000000007fffffff
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002
R13: 00007fa61dc1b780 R14: 00007fa61dc17600 R15: 00007fa61dc16a00
 </TASK>

Lockdep considers this an AA deadlock because the different resize_lock
mutexes reside in the same lockdep class, but this is a false positive.
Place them in distinct classes to avoid these warnings.

Link: https://lkml.kernel.org/r/20240712031314.2570452-1-linmiaohe@huawei.com
Fixes: 8531fc6f52f5 ("hugetlb: add hugetlb demote page support")
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Acked-by: Muchun Song <muchun.song@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ecd0ceeea206..c9bf68c239a0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -663,6 +663,7 @@ HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable)
 /* Defines one hugetlb page size */
 struct hstate {
 	struct mutex resize_lock;
+	struct lock_class_key resize_key;
 	int next_nid_to_alloc;
 	int next_nid_to_free;
 	unsigned int order;
-- 
cgit v1.2.3


From 67856f44da381973caf4eb692ad2cca1de7b2d37 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 15 Jul 2024 08:25:33 +0300
Subject: ia64: scrub ia64 from poison.h

Link: https://lkml.kernel.org/r/c72e5467-06a8-4739-ae6a-7c84c96cad77@p183
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/poison.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/poison.h b/include/linux/poison.h
index 1f0ee2459f2a..a265b499033b 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -52,12 +52,6 @@
 /********** arch/$ARCH/mm/init.c **********/
 #define POISON_FREE_INITMEM	0xcc
 
-/********** arch/ia64/hp/common/sba_iommu.c **********/
-/*
- * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a
- * value of "SBAIOMMU POISON\0" for spill-over poisoning.
- */
-
 /********** fs/jbd/journal.c **********/
 #define JBD_POISON_FREE		0x5b
 #define JBD2_POISON_FREE	0x5c
-- 
cgit v1.2.3


From c14112a5574ff5cf3de198ab6eeff53ac1234068 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Wed, 17 Jul 2024 10:29:16 -0700
Subject: driver core: auxiliary bus: Fix documentation of auxiliary_device

Fix the documentation of the below field of struct auxiliary_device

include/linux/auxiliary_bus.h:150: warning: Function parameter or struct member 'sysfs' not described in 'auxiliary_device'
include/linux/auxiliary_bus.h:150: warning: Excess struct member 'irqs' description in 'auxiliary_device'
include/linux/auxiliary_bus.h:150: warning: Excess struct member 'lock' description in 'auxiliary_device'
include/linux/auxiliary_bus.h:150: warning: Excess struct member 'irq_dir_exists' description in 'auxiliary_device'

Fixes: a808878308a8 ("driver core: auxiliary bus: show auxiliary device IRQs")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://patch.msgid.link/20240717172916.595808-1-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/auxiliary_bus.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index 3ba4487c9cd9..1539bbd263d2 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -58,9 +58,10 @@
  *       in
  * @name: Match name found by the auxiliary device driver,
  * @id: unique identitier if multiple devices of the same name are exported,
- * @irqs: irqs xarray contains irq indices which are used by the device,
- * @lock: Synchronize irq sysfs creation,
- * @irq_dir_exists: whether "irqs" directory exists,
+ * @sysfs: embedded struct which hold all sysfs related fields,
+ * @sysfs.irqs: irqs xarray contains irq indices which are used by the device,
+ * @sysfs.lock: Synchronize irq sysfs creation,
+ * @sysfs.irq_dir_exists: whether "irqs" directory exists,
  *
  * An auxiliary_device represents a part of its parent device's functionality.
  * It is given a name that, combined with the registering drivers
-- 
cgit v1.2.3


From f4b89d8ce5a835afa51404977ee7e3889c2b9722 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=BCnther=20Noack?= <gnoack@google.com>
Date: Mon, 15 Jul 2024 16:03:29 +0000
Subject: landlock: Various documentation improvements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Fix some typos, incomplete or confusing phrases.
* Split paragraphs where appropriate.
* List the same error code multiple times,
  if it has multiple possible causes.
* Bring wording closer to the man page wording,
  which has undergone more thorough review
  (esp. for LANDLOCK_ACCESS_FS_WRITE_FILE).
* Small semantic clarifications
  * Call the ephemeral port range "ephemeral"
  * Clarify reasons for EFAULT in landlock_add_rule()
  * Clarify @rule_type doc for landlock_add_rule()

This is a collection of small fixes which I collected when preparing the
corresponding man pages [1].

Cc: Alejandro Colomar <alx@kernel.org>
Cc: Konstantin Meskhidze <konstantin.meskhidze@huawei.com>
Link: https://lore.kernel.org/r/20240715155554.2791018-1-gnoack@google.com [1]
Signed-off-by: Günther Noack <gnoack@google.com>
Link: https://lore.kernel.org/r/20240715160328.2792835-2-gnoack@google.com
[mic: Add label to link, fix formatting spotted by make htmldocs,
synchronize userspace-api documentation's date]
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 include/uapi/linux/landlock.h | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index e76186da3260..2c8dbc74b955 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -104,20 +104,21 @@ struct landlock_path_beneath_attr {
  */
 struct landlock_net_port_attr {
 	/**
-	 * @allowed_access: Bitmask of allowed access network for a port
+	 * @allowed_access: Bitmask of allowed network actions for a port
 	 * (cf. `Network flags`_).
 	 */
 	__u64 allowed_access;
 	/**
 	 * @port: Network port in host endianness.
 	 *
-	 * It should be noted that port 0 passed to :manpage:`bind(2)` will
-	 * bind to an available port from a specific port range. This can be
-	 * configured thanks to the ``/proc/sys/net/ipv4/ip_local_port_range``
-	 * sysctl (also used for IPv6). A Landlock rule with port 0 and the
-	 * ``LANDLOCK_ACCESS_NET_BIND_TCP`` right means that requesting to bind
-	 * on port 0 is allowed and it will automatically translate to binding
-	 * on the related port range.
+	 * It should be noted that port 0 passed to :manpage:`bind(2)` will bind
+	 * to an available port from the ephemeral port range.  This can be
+	 * configured with the ``/proc/sys/net/ipv4/ip_local_port_range`` sysctl
+	 * (also used for IPv6).
+	 *
+	 * A Landlock rule with port 0 and the ``LANDLOCK_ACCESS_NET_BIND_TCP``
+	 * right means that requesting to bind on port 0 is allowed and it will
+	 * automatically translate to binding on the related port range.
 	 */
 	__u64 port;
 };
@@ -138,10 +139,10 @@ struct landlock_net_port_attr {
  * The following access rights apply only to files:
  *
  * - %LANDLOCK_ACCESS_FS_EXECUTE: Execute a file.
- * - %LANDLOCK_ACCESS_FS_WRITE_FILE: Open a file with write access. Note that
- *   you might additionally need the %LANDLOCK_ACCESS_FS_TRUNCATE right in order
- *   to overwrite files with :manpage:`open(2)` using ``O_TRUNC`` or
- *   :manpage:`creat(2)`.
+ * - %LANDLOCK_ACCESS_FS_WRITE_FILE: Open a file with write access.  When
+ *   opening files for writing, you will often additionally need the
+ *   %LANDLOCK_ACCESS_FS_TRUNCATE right.  In many cases, these system calls
+ *   truncate existing files when overwriting them (e.g., :manpage:`creat(2)`).
  * - %LANDLOCK_ACCESS_FS_READ_FILE: Open a file with read access.
  * - %LANDLOCK_ACCESS_FS_TRUNCATE: Truncate a file with :manpage:`truncate(2)`,
  *   :manpage:`ftruncate(2)`, :manpage:`creat(2)`, or :manpage:`open(2)` with
@@ -263,7 +264,7 @@ struct landlock_net_port_attr {
  * These flags enable to restrict a sandboxed process to a set of network
  * actions. This is supported since the Landlock ABI version 4.
  *
- * TCP sockets with allowed actions:
+ * The following access rights apply to TCP port numbers:
  *
  * - %LANDLOCK_ACCESS_NET_BIND_TCP: Bind a TCP socket to a local port.
  * - %LANDLOCK_ACCESS_NET_CONNECT_TCP: Connect an active TCP socket to
-- 
cgit v1.2.3


From 338bb57e4c2a1c2c6fc92f9c0bd35be7587adca7 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 15 Jul 2024 17:23:53 +0300
Subject: ipv4: Fix incorrect TOS in route get reply

The TOS value that is returned to user space in the route get reply is
the one with which the lookup was performed ('fl4->flowi4_tos'). This is
fine when the matched route is configured with a TOS as it would not
match if its TOS value did not match the one with which the lookup was
performed.

However, matching on TOS is only performed when the route's TOS is not
zero. It is therefore possible to have the kernel incorrectly return a
non-zero TOS:

 # ip link add name dummy1 up type dummy
 # ip address add 192.0.2.1/24 dev dummy1
 # ip route get 192.0.2.2 tos 0xfc
 192.0.2.2 tos 0x1c dev dummy1 src 192.0.2.1 uid 0
     cache

Fix by adding a DSCP field to the FIB result structure (inside an
existing 4 bytes hole), populating it in the route lookup and using it
when filling the route get reply.

Output after the patch:

 # ip link add name dummy1 up type dummy
 # ip address add 192.0.2.1/24 dev dummy1
 # ip route get 192.0.2.2 tos 0xfc
 192.0.2.2 dev dummy1 src 192.0.2.1 uid 0
     cache

Fixes: 1a00fee4ffb2 ("ipv4: Remove rt_key_{src,dst,tos} from struct rtable.")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/ip_fib.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 6e7984bfb986..72af2f223e59 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -173,6 +173,7 @@ struct fib_result {
 	unsigned char		type;
 	unsigned char		scope;
 	u32			tclassid;
+	dscp_t			dscp;
 	struct fib_nh_common	*nhc;
 	struct fib_info		*fi;
 	struct fib_table	*table;
-- 
cgit v1.2.3


From b9fae9f06d84ffab0f3f9118f3a96bbcdc528bf6 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Thu, 11 Jul 2024 13:21:00 -0400
Subject: SUNRPC: Fixup gss_status tracepoint error output

The GSS routine errors are values, not flags.

Fixes: 0c77668ddb4e ("SUNRPC: Introduce trace points in rpc_auth_gss.ko")
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/trace/events/rpcgss.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h
index 7f0c1ceae726..b0b6300a0cab 100644
--- a/include/trace/events/rpcgss.h
+++ b/include/trace/events/rpcgss.h
@@ -54,7 +54,7 @@ TRACE_DEFINE_ENUM(GSS_S_UNSEQ_TOKEN);
 TRACE_DEFINE_ENUM(GSS_S_GAP_TOKEN);
 
 #define show_gss_status(x)						\
-	__print_flags(x, "|",						\
+	__print_symbolic(x, 						\
 		{ GSS_S_BAD_MECH, "GSS_S_BAD_MECH" },			\
 		{ GSS_S_BAD_NAME, "GSS_S_BAD_NAME" },			\
 		{ GSS_S_BAD_NAMETYPE, "GSS_S_BAD_NAMETYPE" },		\
-- 
cgit v1.2.3


From f2f6a8e8871725035959b90bac048cde555aa0e9 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 18 Jul 2024 13:06:47 +0100
Subject: init/Kconfig: remove CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND

Several versions of GCC mis-compile asm goto with outputs. We try to
workaround this, but our workaround is demonstrably incomplete and
liable to result in subtle bugs, especially on arm64 where get_user()
has recently been moved over to using asm goto with outputs.

From discussion(s) with Linus at:

  https://lore.kernel.org/linux-arm-kernel/Zpfv2tnlQ-gOLGac@J2N7QTR9R3.cambridge.arm.com/
  https://lore.kernel.org/linux-arm-kernel/ZpfxLrJAOF2YNqCk@J2N7QTR9R3.cambridge.arm.com/

... it sounds like the best thing to do for now is to remove the
workaround and make CC_HAS_ASM_GOTO_OUTPUT depend on working compiler
versions.

The issue was originally reported to GCC by Sean Christopherson:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921

... and Jakub Jelinek fixed this for GCC 14, with the fix backported to
13.3.0, 12.4.0, and 11.5.0.

In the kernel, we tried to workaround broken compilers in commits:

  4356e9f841f7 ("work around gcc bugs with 'asm goto' with outputs")
  68fb3ca0e408 ("update workarounds for gcc "asm goto" issue")

... but the workaround of adding an empty asm("") after the asm volatile
goto(...) demonstrably does not always avoid the problem, as can be seen
in the following test case:

| #define asm_goto_output(x...) \
|         do { asm volatile goto(x); asm (""); } while (0)
|
| #define __good_or_bad(__val, __key)                                     \
| do {                                                                    \
|         __label__ __failed;                                             \
|         unsigned long __tmp;                                            \
|         asm_goto_output(                                                \
|         "       cbnz    %[key], %l[__failed]\n"                         \
|         "       mov     %[val], #0x900d\n"                              \
|         : [val] "=r" (__tmp)                                            \
|         : [key] "r" (__key)                                             \
|         :                                                               \
|         : __failed);                                                    \
|         (__val) = __tmp;                                                \
|         break;                                                          \
| __failed:                                                               \
|         (__val) = 0xbad;                                                \
| } while (0)
|
| unsigned long get_val(unsigned long key);
| unsigned long get_val(unsigned long key)
| {
|         unsigned long val = 0xbad;
|
|         __good_or_bad(val, key);
|
|         return val;
| }

GCC 13.2.0 (at -O2) compiles this to:

| 	cbnz    x0, .Lfailed
| 	mov     x0, #0x900d
| .Lfailed:
| 	ret

GCC 14.1.0 (at -O2) compiles this to:

| 	cbnz    x0, .Lfailed
| 	mov     x0, #0x900d
| 	ret
| .Lfailed:
| 	mov     x0, #0xbad
| 	ret

Note that GCC 13.2.0 erroneously omits the assignment to 'val' in the
error path (even though this does not depend on an output of the asm
goto). GCC 14.1.0 correctly retains the assignment.

This problem can be seen within the kernel with the following test case:

| #include <linux/uaccess.h>
| #include <linux/types.h>
|
| noinline unsigned long test_unsafe_get_user(unsigned long __user *ptr);
| noinline unsigned long test_unsafe_get_user(unsigned long __user *ptr)
| {
|         unsigned long val;
|
|         unsafe_get_user(val, ptr, Efault);
|         return val;
|
| Efault:
|         val = 0x900d;
|         return val;
| }

GCC 13.2.0 (arm64 defconfig) compiles this to:

|         and     x0, x0, #0xff7fffffffffffff
|         ldtr    x0, [x0]
| .Lextable_fixup:
|         ret

GCC 13.2.0 (x86_64 defconfig + MITIGATION_RETPOLINE=n) compiles this to:

|         endbr64
|         mov    (%rdi),%rax
| .Lextable_fixup:
|         ret

... omitting the assignment to 'val' in the error path, and leaving
garbage in the result register returned by the function (which happens
to contain the faulting address in the generated code).

GCC 14.1.0 (arm64 defconfig) compiles this to:

|         and     x0, x0, #0xff7fffffffffffff
|         ldtr    x0, [x0]
|         ret
| .Lextable_fixup:
|         mov     x0, #0x900d                     // #36877
|         ret

GCC 14.1.0 (x86_64 defconfig + MITIGATION_RETPOLINE=n) compiles this to:

|         endbr64
|         mov    (%rdi),%rax
|         ret
| .Lextable_fixup:
|         mov    $0x900d,%eax
|         ret

... retaining the expected assignment to 'val' in the error path.

We don't have a complete and reasonable workaround. While placing empty
asm("") blocks after each goto label *might* be sufficient, we don't
know for certain, this is tedious and error-prone, and there doesn't
seem to be a neat way to wrap this up (which is especially painful for
cases with multiple goto labels).

Avoid this issue by disabling CONFIG_CC_HAS_ASM_GOTO_OUTPUT for
known-broken compiler versions and removing the workaround (along with
the CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND config option).

For the moment I've left the default implementation of asm_goto_output()
unchanged. This should now be redundant since any compiler with the fix
for the clobbering issue whould also have a fix for the (earlier)
volatile issue, but it's far less churny to leave it around, which makes
it easier to backport this patch if necessary.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alex Coplan <alex.coplan@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jakub Jelinek <jakub@gcc.gnu.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Szabolcs Nagy <szabolcs.nagy@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index aff92b1d284f..f805adaa316e 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -64,26 +64,6 @@
 		__builtin_unreachable();	\
 	} while (0)
 
-/*
- * GCC 'asm goto' with outputs miscompiles certain code sequences:
- *
- *   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
- *
- * Work around it via the same compiler barrier quirk that we used
- * to use for the old 'asm goto' workaround.
- *
- * Also, always mark such 'asm goto' statements as volatile: all
- * asm goto statements are supposed to be volatile as per the
- * documentation, but some versions of gcc didn't actually do
- * that for asms with outputs:
- *
- *    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619
- */
-#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND
-#define asm_goto_output(x...) \
-	do { asm volatile goto(x); asm (""); } while (0)
-#endif
-
 #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP)
 #define __HAVE_BUILTIN_BSWAP32__
 #define __HAVE_BUILTIN_BSWAP64__
-- 
cgit v1.2.3


From 7d189c77106ed6df09829f7a419e35ada67b2bd0 Mon Sep 17 00:00:00 2001
From: Shivamurthy Shastri <shivamurthy.shastri@linutronix.de>
Date: Wed, 26 Jun 2024 21:05:12 +0200
Subject: PCI/MSI: Provide MSI_FLAG_PCI_MSI_MASK_PARENT

Most ARM(64) PCI/MSI domains mask and unmask in the parent domain after or
before the PCI mask/unmask operation takes place. So there are more than a
dozen of the same wrapper implementation all over the place.

Don't make the same mistake with the new per device PCI/MSI domains and
provide a new MSI feature flag, which lets the domain implementation
enable this sequence in the PCI/MSI code.

Signed-off-by: Shivamurthy Shastri <shivamurthy.shastri@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/87ed8j34pj.ffs@tglx
---
 include/linux/msi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index dc27cf3903d5..04f33e7f6f8b 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -556,6 +556,8 @@ enum {
 	MSI_FLAG_USE_DEV_FWNODE		= (1 << 7),
 	/* Set parent->dev into domain->pm_dev on device domain creation */
 	MSI_FLAG_PARENT_PM_DEV		= (1 << 8),
+	/* Support for parent mask/unmask */
+	MSI_FLAG_PCI_MSI_MASK_PARENT	= (1 << 9),
 
 	/* Mask for the generic functionality */
 	MSI_GENERIC_FLAGS_MASK		= GENMASK(15, 0),
-- 
cgit v1.2.3


From f6a9886a9e55a1e6bd78c7e505205d05ef50a71e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 23 Jun 2024 17:18:50 +0200
Subject: genirq/msi: Remove platform_msi_create_device_domain()

No more users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Shivamurthy Shastri <shivamurthy.shastri@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240623142235.395577449@linutronix.de
---
 include/linux/msi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 04f33e7f6f8b..4ae036d0c7db 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -660,8 +660,6 @@ __platform_msi_create_device_domain(struct device *dev,
 				    const struct irq_domain_ops *ops,
 				    void *host_data);
 
-#define platform_msi_create_device_domain(dev, nvec, write, ops, data)	\
-	__platform_msi_create_device_domain(dev, nvec, false, write, ops, data)
 #define platform_msi_create_device_tree_domain(dev, nvec, write, ops, data) \
 	__platform_msi_create_device_domain(dev, nvec, true, write, ops, data)
 
-- 
cgit v1.2.3


From e9894248994ca8291838baf063f045eab28e5a0e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 23 Jun 2024 17:19:05 +0200
Subject: genirq/msi: Remove platform MSI leftovers

No more users!

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Shivamurthy Shastri <shivamurthy.shastri@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20240623142235.943295676@linutronix.de
---
 include/linux/msi.h | 30 ------------------------------
 1 file changed, 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 4ae036d0c7db..4c3462a6a97b 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -81,7 +81,6 @@ extern int pci_msi_ignore_mask;
 /* Helper functions */
 struct msi_desc;
 struct pci_dev;
-struct platform_msi_priv_data;
 struct device_attribute;
 struct irq_domain;
 struct irq_affinity_desc;
@@ -231,14 +230,12 @@ struct msi_dev_domain {
 /**
  * msi_device_data - MSI per device data
  * @properties:		MSI properties which are interesting to drivers
- * @platform_data:	Platform-MSI specific data
  * @mutex:		Mutex protecting the MSI descriptor store
  * @__domains:		Internal data for per device MSI domains
  * @__iter_idx:		Index to search the next entry for iterators
  */
 struct msi_device_data {
 	unsigned long			properties;
-	struct platform_msi_priv_data	*platform_data;
 	struct mutex			mutex;
 	struct msi_dev_domain		__domains[MSI_MAX_DEVICE_IRQDOMAINS];
 	unsigned long			__iter_idx;
@@ -641,33 +638,6 @@ void msi_domain_free_irqs_all(struct device *dev, unsigned int domid);
 
 struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain);
 
-struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
-						  struct msi_domain_info *info,
-						  struct irq_domain *parent);
-
-/* When an MSI domain is used as an intermediate domain */
-int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
-			    int nvec, msi_alloc_info_t *args);
-int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
-			     int virq, int nvec, msi_alloc_info_t *args);
-void msi_domain_depopulate_descs(struct device *dev, int virq, int nvec);
-
-struct irq_domain *
-__platform_msi_create_device_domain(struct device *dev,
-				    unsigned int nvec,
-				    bool is_tree,
-				    irq_write_msi_msg_t write_msi_msg,
-				    const struct irq_domain_ops *ops,
-				    void *host_data);
-
-#define platform_msi_create_device_tree_domain(dev, nvec, write, ops, data) \
-	__platform_msi_create_device_domain(dev, nvec, true, write, ops, data)
-
-int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int virq,
-				     unsigned int nr_irqs);
-void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq,
-				     unsigned int nvec);
-void *platform_msi_get_host_data(struct irq_domain *domain);
 /* Per device platform MSI */
 int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec,
 					    irq_write_msi_msg_t write_msi_msg);
-- 
cgit v1.2.3


From 2fdda02a8749fdaff5621c96aaf24a61d2f8c5a2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 23 Jun 2024 17:19:07 +0200
Subject: genirq/msi: Move msi_device_data to core

Now that the platform MSI hack is gone, nothing needs to know about struct
msi_device_data outside of the core code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Shivamurthy Shastri <shivamurthy.shastri@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240623142236.003295177@linutronix.de
---
 include/linux/msi.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 4c3462a6a97b..369367ecae5e 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -21,11 +21,7 @@
 #include <linux/irqdomain_defs.h>
 #include <linux/cpumask.h>
 #include <linux/msi_api.h>
-#include <linux/xarray.h>
-#include <linux/mutex.h>
-#include <linux/list.h>
 #include <linux/irq.h>
-#include <linux/bits.h>
 
 #include <asm/msi.h>
 
@@ -227,20 +223,6 @@ struct msi_dev_domain {
 	struct irq_domain	*domain;
 };
 
-/**
- * msi_device_data - MSI per device data
- * @properties:		MSI properties which are interesting to drivers
- * @mutex:		Mutex protecting the MSI descriptor store
- * @__domains:		Internal data for per device MSI domains
- * @__iter_idx:		Index to search the next entry for iterators
- */
-struct msi_device_data {
-	unsigned long			properties;
-	struct mutex			mutex;
-	struct msi_dev_domain		__domains[MSI_MAX_DEVICE_IRQDOMAINS];
-	unsigned long			__iter_idx;
-};
-
 int msi_setup_device_data(struct device *dev);
 
 void msi_lock_descs(struct device *dev);
-- 
cgit v1.2.3


From a97b43fac5b9b3ffca71b8a917a249789902fce9 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Thu, 18 Jul 2024 17:17:10 -0400
Subject: lockdep: Add comments for lockdep_set_no{validate,track}_class()

Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/lockdep.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b76f1bcd2f7f..0759e30df392 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -178,9 +178,24 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name,
 			      (lock)->dep_map.wait_type_outer,		\
 			      (lock)->dep_map.lock_type)
 
+/**
+ * lockdep_set_novalidate_class: disable checking of lock ordering on a given
+ * lock
+ * @lock: Lock to mark
+ *
+ * Lockdep will still record that this lock has been taken, and print held
+ * instances when dumping locks
+ */
 #define lockdep_set_novalidate_class(lock) \
 	lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock)
 
+/**
+ * lockdep_set_notrack_class: disable lockdep tracking of a given lock entirely
+ * @lock: Lock to mark
+ *
+ * Bigger hammer than lockdep_set_novalidate_class: so far just for bcachefs,
+ * which takes more locks than lockdep is able to track (48).
+ */
 #define lockdep_set_notrack_class(lock) \
 	lockdep_set_class_and_name(lock, &__lockdep_no_track__, #lock)
 
-- 
cgit v1.2.3


From a2d6d8aee4a4f694920bfaf6f6be3690e0f8d302 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 19 Jul 2024 11:56:34 +0200
Subject: ALSA: hda: tas2781: mark const variables as __maybe_unused

An earlier patch changed the DECLARE_TLV_DB_SCALE declaration, but
now there are additional static const variables that cause
the same build warnings:

In file included from sound/pci/hda/tas2781_hda_i2c.c:23:
include/sound/tas2781-tlv.h:23:28: error: 'tas2563_dvc_table' defined but not used [-Werror=unused-const-variable=]
   23 | static const unsigned char tas2563_dvc_table[][4] = {
      |                            ^~~~~~~~~~~~~~~~~
In file included from include/sound/tlv.h:10,
                 from sound/pci/hda/tas2781_hda_i2c.c:22:
include/sound/tas2781-tlv.h:20:35: error: 'tas2563_dvc_tlv' defined but not used [-Werror=unused-const-variable=]
   20 | static const DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1);
      |                                   ^~~~~~~~~~~~~~~

Mark them all as unused as well.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://patch.msgid.link/20240719095640.3741247-1-arnd@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/tas2781-tlv.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h
index 99c41bfc7827..00fd4d449ff3 100644
--- a/include/sound/tas2781-tlv.h
+++ b/include/sound/tas2781-tlv.h
@@ -16,11 +16,11 @@
 #define __TAS2781_TLV_H__
 
 static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0);
-static const DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0);
-static const DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1);
+static const __maybe_unused DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0);
+static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1);
 
 /* pow(10, db/20) * pow(2,30) */
-static const unsigned char tas2563_dvc_table[][4] = {
+static const __maybe_unused unsigned char tas2563_dvc_table[][4] = {
 	{ 0X00, 0X00, 0X00, 0X00 }, /* -121.5db */
 	{ 0X00, 0X00, 0X03, 0XBC }, /* -121.0db */
 	{ 0X00, 0X00, 0X03, 0XF5 }, /* -120.5db */
-- 
cgit v1.2.3


From c8f51feee135f37f0d77b4616083c25524daa7b0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 19 Jul 2024 11:29:01 +0000
Subject: block: remove QUEUE_FLAG_STOPPED

QUEUE_FLAG_STOPPED is entirely unused.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240719112912.3830443-5-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dce4a6bf7307..942ad4e0f231 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -588,7 +588,6 @@ struct request_queue {
 };
 
 /* Keep blk_queue_flag_name[] in sync with the definitions below */
-#define QUEUE_FLAG_STOPPED	0	/* queue is stopped */
 #define QUEUE_FLAG_DYING	1	/* queue being torn down */
 #define QUEUE_FLAG_NOMERGES     3	/* disable merge attempts */
 #define QUEUE_FLAG_SAME_COMP	4	/* complete on same CPU-group */
@@ -608,7 +607,6 @@ struct request_queue {
 void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
 void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
 
-#define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
 #define blk_queue_init_done(q)	test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
-- 
cgit v1.2.3


From 3dff6155733f25872530ad358c6f5559800f4ccb Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Fri, 19 Jul 2024 11:29:02 +0000
Subject: block: Relocate BLK_MQ_CPU_WORK_BATCH

BLK_MQ_CPU_WORK_BATCH is defined in include/linux/blk-mq.h, but only used
in blk-mq.c, so relocate to block/blk-mq.h

Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240719112912.3830443-6-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 89ba6b16fe8b..df775a203a4d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -672,8 +672,6 @@ enum {
 	BLK_MQ_S_INACTIVE	= 3,
 
 	BLK_MQ_MAX_DEPTH	= 10240,
-
-	BLK_MQ_CPU_WORK_BATCH	= 8,
 };
 #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
 	((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
-- 
cgit v1.2.3


From 793356d23f8a817e164a917c792741a6d6d651ed Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Fri, 19 Jul 2024 11:29:03 +0000
Subject: block: Relocate BLK_MQ_MAX_DEPTH

BLK_MQ_MAX_DEPTH is defined as an enumerated value, but has no real
relation to the other members in its enum, so just use #define to provide
the definition.

Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240719112912.3830443-7-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index df775a203a4d..8a84f49468d5 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -670,8 +670,6 @@ enum {
 
 	/* hw queue is inactive after all its CPUs become offline */
 	BLK_MQ_S_INACTIVE	= 3,
-
-	BLK_MQ_MAX_DEPTH	= 10240,
 };
 #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
 	((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
@@ -680,6 +678,7 @@ enum {
 	((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
 		<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
 
+#define BLK_MQ_MAX_DEPTH	(10240)
 #define BLK_MQ_NO_HCTX_IDX	(-1U)
 
 struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
-- 
cgit v1.2.3


From 55177adf1837bc56f878f7f6f7123947a2088148 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Fri, 19 Jul 2024 11:29:04 +0000
Subject: block: Make QUEUE_FLAG_x as an enum

This will allow us better keep in sync with blk_queue_flag_name[].

Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240719112912.3830443-8-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 942ad4e0f231..ded46fad67a0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -588,19 +588,22 @@ struct request_queue {
 };
 
 /* Keep blk_queue_flag_name[] in sync with the definitions below */
-#define QUEUE_FLAG_DYING	1	/* queue being torn down */
-#define QUEUE_FLAG_NOMERGES     3	/* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP	4	/* complete on same CPU-group */
-#define QUEUE_FLAG_FAIL_IO	5	/* fake timeout */
-#define QUEUE_FLAG_NOXMERGES	9	/* No extended merges */
-#define QUEUE_FLAG_SAME_FORCE	12	/* force complete on same CPU */
-#define QUEUE_FLAG_INIT_DONE	14	/* queue is initialized */
-#define QUEUE_FLAG_STATS	20	/* track IO start and completion times */
-#define QUEUE_FLAG_REGISTERED	22	/* queue has been registered to a disk */
-#define QUEUE_FLAG_QUIESCED	24	/* queue has been quiesced */
-#define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
-#define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
-#define QUEUE_FLAG_SQ_SCHED     30	/* single queue style io dispatch */
+enum {
+	QUEUE_FLAG_DYING,		/* queue being torn down */
+	QUEUE_FLAG_NOMERGES,		/* disable merge attempts */
+	QUEUE_FLAG_SAME_COMP,		/* complete on same CPU-group */
+	QUEUE_FLAG_FAIL_IO,		/* fake timeout */
+	QUEUE_FLAG_NOXMERGES,		/* No extended merges */
+	QUEUE_FLAG_SAME_FORCE,		/* force complete on same CPU */
+	QUEUE_FLAG_INIT_DONE,		/* queue is initialized */
+	QUEUE_FLAG_STATS,		/* track IO start and completion times */
+	QUEUE_FLAG_REGISTERED,		/* queue has been registered to a disk */
+	QUEUE_FLAG_QUIESCED,		/* queue has been quiesced */
+	QUEUE_FLAG_RQ_ALLOC_TIME,	/* record rq->alloc_time_ns */
+	QUEUE_FLAG_HCTX_ACTIVE,		/* at least one blk-mq hctx is active */
+	QUEUE_FLAG_SQ_SCHED,		/* single queue style io dispatch */
+	QUEUE_FLAG_MAX
+};
 
 #define QUEUE_FLAG_MQ_DEFAULT	(1UL << QUEUE_FLAG_SAME_COMP)
 
-- 
cgit v1.2.3


From 23827310cce7eff3477aeaeb59ea3718f5c9c633 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Fri, 19 Jul 2024 11:29:06 +0000
Subject: block: Catch possible entries missing from hctx_state_name[]

Add a build-time assert that we are not missing entries from
hctx_state_name[]. For this, create a separate enum for state flags and add
a "max" entry for BLK_MQ_S_x flags.

The numbering for those enum values is as default, so don't explicitly
number.

Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240719112912.3830443-10-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8a84f49468d5..4905a1d67551 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -663,13 +663,6 @@ enum {
 	BLK_MQ_F_NO_SCHED_BY_DEFAULT	= 1 << 7,
 	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
 	BLK_MQ_F_ALLOC_POLICY_BITS = 1,
-
-	BLK_MQ_S_STOPPED	= 0,
-	BLK_MQ_S_TAG_ACTIVE	= 1,
-	BLK_MQ_S_SCHED_RESTART	= 2,
-
-	/* hw queue is inactive after all its CPUs become offline */
-	BLK_MQ_S_INACTIVE	= 3,
 };
 #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
 	((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \
@@ -681,6 +674,16 @@ enum {
 #define BLK_MQ_MAX_DEPTH	(10240)
 #define BLK_MQ_NO_HCTX_IDX	(-1U)
 
+enum {
+	/* Keep hctx_state_name[] in sync with the definitions below */
+	BLK_MQ_S_STOPPED,
+	BLK_MQ_S_TAG_ACTIVE,
+	BLK_MQ_S_SCHED_RESTART,
+	/* hw queue is inactive after all its CPUs become offline */
+	BLK_MQ_S_INACTIVE,
+	BLK_MQ_S_MAX
+};
+
 struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
 		struct queue_limits *lim, void *queuedata,
 		struct lock_class_key *lkclass);
-- 
cgit v1.2.3


From 226f0f6afc3e5c8903c6e57e1f6073ad8ad189b5 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Fri, 19 Jul 2024 11:29:07 +0000
Subject: block: Catch possible entries missing from hctx_flag_name[]

Refresh values in BLK_MQ_F_x enum, and then re-arrange members in
hctx_flag_name[] to match that enum. Renumber
BLK_MQ_F_ALLOC_POLICY_START_BIT to match the value refresh.

Add a BUILD_BUG_ON() call to ensure that we are not missing entries in
hctx_flag_name[].

Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240719112912.3830443-11-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 4905a1d67551..27241009c8f9 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -644,6 +644,7 @@ struct blk_mq_ops {
 #endif
 };
 
+/* Keep hctx_flag_name[] in sync with the definitions below */
 enum {
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
 	BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1,
@@ -653,15 +654,16 @@ enum {
 	 */
 	BLK_MQ_F_STACKING	= 1 << 2,
 	BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3,
-	BLK_MQ_F_BLOCKING	= 1 << 5,
+	BLK_MQ_F_BLOCKING	= 1 << 4,
 	/* Do not allow an I/O scheduler to be configured. */
-	BLK_MQ_F_NO_SCHED	= 1 << 6,
+	BLK_MQ_F_NO_SCHED	= 1 << 5,
+
 	/*
 	 * Select 'none' during queue registration in case of a single hwq
 	 * or shared hwqs instead of 'mq-deadline'.
 	 */
-	BLK_MQ_F_NO_SCHED_BY_DEFAULT	= 1 << 7,
-	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
+	BLK_MQ_F_NO_SCHED_BY_DEFAULT	= 1 << 6,
+	BLK_MQ_F_ALLOC_POLICY_START_BIT = 7,
 	BLK_MQ_F_ALLOC_POLICY_BITS = 1,
 };
 #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \
-- 
cgit v1.2.3


From 26d3bdb57ec3fa56eaf8d2e74b5d488e55f43013 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Fri, 19 Jul 2024 11:29:08 +0000
Subject: block: Catch possible entries missing from alloc_policy_name[]

Make BLK_TAG_ALLOC_x an enum and add a "max" entry.

Add a BUILD_BUG_ON() call to ensure that we are not missing entries in
hctx_flag_name[].

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240719112912.3830443-12-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 27241009c8f9..a64a50a0edf7 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -278,8 +278,12 @@ enum blk_eh_timer_return {
 	BLK_EH_RESET_TIMER,
 };
 
-#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
-#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
+/* Keep alloc_policy_name[] in sync with the definitions below */
+enum {
+	BLK_TAG_ALLOC_FIFO,	/* allocate starting from 0 */
+	BLK_TAG_ALLOC_RR,	/* allocate starting from last allocated tag */
+	BLK_TAG_ALLOC_MAX
+};
 
 /**
  * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
-- 
cgit v1.2.3


From 6fa99325ec86bcd442363d77561a1babd8d9a427 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Fri, 19 Jul 2024 11:29:09 +0000
Subject: block: Catch possible entries missing from cmd_flag_name[]

Add a BUILD_BUG_ON() call to ensure that we are not missing entries in
cmd_flag_name[].

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240719112912.3830443-13-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 632edd71f8c6..36ed96133217 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -354,6 +354,7 @@ enum req_op {
 	REQ_OP_LAST		= (__force blk_opf_t)36,
 };
 
+/* Keep cmd_flag_name[] in sync with the definitions below */
 enum req_flag_bits {
 	__REQ_FAILFAST_DEV =	/* no driver retries of device errors */
 		REQ_OP_BITS,
-- 
cgit v1.2.3


From 5f89154e8e9e3445f9b592e58a7045e06153b822 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Fri, 19 Jul 2024 11:29:10 +0000
Subject: block: Use enum to define RQF_x bit indexes

Similar to what we do for enum req_flag_bits, divide the definition of
RQF_x flags into an enum to declare the bits and an actual flag.

Tweak some comments to not spill onto new lines.

Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240719112912.3830443-14-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 86 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 54 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a64a50a0edf7..af52ec6a1ed5 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -27,38 +27,60 @@ typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t);
  * request flags */
 typedef __u32 __bitwise req_flags_t;
 
-/* drive already may have started this one */
-#define RQF_STARTED		((__force req_flags_t)(1 << 1))
-/* request for flush sequence */
-#define RQF_FLUSH_SEQ		((__force req_flags_t)(1 << 4))
-/* merge of different types, fail separately */
-#define RQF_MIXED_MERGE		((__force req_flags_t)(1 << 5))
-/* don't call prep for this one */
-#define RQF_DONTPREP		((__force req_flags_t)(1 << 7))
-/* use hctx->sched_tags */
-#define RQF_SCHED_TAGS		((__force req_flags_t)(1 << 8))
-/* use an I/O scheduler for this request */
-#define RQF_USE_SCHED		((__force req_flags_t)(1 << 9))
-/* vaguely specified driver internal error.  Ignored by the block layer */
-#define RQF_FAILED		((__force req_flags_t)(1 << 10))
-/* don't warn about errors */
-#define RQF_QUIET		((__force req_flags_t)(1 << 11))
-/* account into disk and partition IO statistics */
-#define RQF_IO_STAT		((__force req_flags_t)(1 << 13))
-/* runtime pm request */
-#define RQF_PM			((__force req_flags_t)(1 << 15))
-/* on IO scheduler merge hash */
-#define RQF_HASHED		((__force req_flags_t)(1 << 16))
-/* track IO completion time */
-#define RQF_STATS		((__force req_flags_t)(1 << 17))
-/* Look at ->special_vec for the actual data payload instead of the
-   bio chain. */
-#define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
-/* The request completion needs to be signaled to zone write pluging. */
-#define RQF_ZONE_WRITE_PLUGGING	((__force req_flags_t)(1 << 20))
-/* ->timeout has been called, don't expire again */
-#define RQF_TIMED_OUT		((__force req_flags_t)(1 << 21))
-#define RQF_RESV		((__force req_flags_t)(1 << 23))
+enum {
+	/* drive already may have started this one */
+	__RQF_STARTED,
+	/* request for flush sequence */
+	__RQF_FLUSH_SEQ,
+	/* merge of different types, fail separately */
+	__RQF_MIXED_MERGE,
+	/* don't call prep for this one */
+	__RQF_DONTPREP,
+	/* use hctx->sched_tags */
+	__RQF_SCHED_TAGS,
+	/* use an I/O scheduler for this request */
+	__RQF_USE_SCHED,
+	/* vaguely specified driver internal error.  Ignored by block layer */
+	__RQF_FAILED,
+	/* don't warn about errors */
+	__RQF_QUIET,
+	/* account into disk and partition IO statistics */
+	__RQF_IO_STAT,
+	/* runtime pm request */
+	__RQF_PM,
+	/* on IO scheduler merge hash */
+	__RQF_HASHED,
+	/* track IO completion time */
+	__RQF_STATS,
+	/* Look at ->special_vec for the actual data payload instead of the
+	   bio chain. */
+	__RQF_SPECIAL_PAYLOAD,
+	/* request completion needs to be signaled to zone write plugging. */
+	__RQF_ZONE_WRITE_PLUGGING,
+	/* ->timeout has been called, don't expire again */
+	__RQF_TIMED_OUT,
+	__RQF_RESV,
+	__RQF_BITS
+};
+
+#define RQF_STARTED		((__force req_flags_t)(1 << __RQF_STARTED))
+#define RQF_FLUSH_SEQ		((__force req_flags_t)(1 << __RQF_FLUSH_SEQ))
+#define RQF_MIXED_MERGE		((__force req_flags_t)(1 << __RQF_MIXED_MERGE))
+#define RQF_DONTPREP		((__force req_flags_t)(1 << __RQF_DONTPREP))
+#define RQF_SCHED_TAGS		((__force req_flags_t)(1 << __RQF_SCHED_TAGS))
+#define RQF_USE_SCHED		((__force req_flags_t)(1 << __RQF_USE_SCHED))
+#define RQF_FAILED		((__force req_flags_t)(1 << __RQF_FAILED))
+#define RQF_QUIET		((__force req_flags_t)(1 << __RQF_QUIET))
+#define RQF_IO_STAT		((__force req_flags_t)(1 << __RQF_IO_STAT))
+#define RQF_PM			((__force req_flags_t)(1 << __RQF_PM))
+#define RQF_HASHED		((__force req_flags_t)(1 << __RQF_HASHED))
+#define RQF_STATS		((__force req_flags_t)(1 << __RQF_STATS))
+#define RQF_SPECIAL_PAYLOAD	\
+			((__force req_flags_t)(1 << __RQF_SPECIAL_PAYLOAD))
+#define RQF_ZONE_WRITE_PLUGGING	\
+			((__force req_flags_t)(1 << __RQF_ZONE_WRITE_PLUGGING))
+#define RQF_TIMED_OUT		((__force req_flags_t)(1 << __RQF_TIMED_OUT))
+#define RQF_RESV		((__force req_flags_t)(1 << __RQF_RESV))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
-- 
cgit v1.2.3


From 8a47e33f50dd779f94bc277c6d3de81672463c5e Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Fri, 19 Jul 2024 11:29:12 +0000
Subject: block: Catch possible entries missing from rqf_name[]

Add a BUILD_BUG_ON() call to ensure that we are not missing entries in
rqf_name[].

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20240719112912.3830443-16-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index af52ec6a1ed5..8d304b1d16b1 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -27,6 +27,7 @@ typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t);
  * request flags */
 typedef __u32 __bitwise req_flags_t;
 
+/* Keep rqf_name[] in sync with the definitions below */
 enum {
 	/* drive already may have started this one */
 	__RQF_STARTED,
-- 
cgit v1.2.3


From 72d04bdcf3f7d7e07d82f9757946f68802a7270a Mon Sep 17 00:00:00 2001
From: Yang Yang <yang.yang@vivo.com>
Date: Tue, 16 Jul 2024 16:26:27 +0800
Subject: sbitmap: fix io hung due to race on sbitmap_word::cleared

Configuration for sbq:
  depth=64, wake_batch=6, shift=6, map_nr=1

1. There are 64 requests in progress:
  map->word = 0xFFFFFFFFFFFFFFFF
2. After all the 64 requests complete, and no more requests come:
  map->word = 0xFFFFFFFFFFFFFFFF, map->cleared = 0xFFFFFFFFFFFFFFFF
3. Now two tasks try to allocate requests:
  T1:                                       T2:
  __blk_mq_get_tag                          .
  __sbitmap_queue_get                       .
  sbitmap_get                               .
  sbitmap_find_bit                          .
  sbitmap_find_bit_in_word                  .
  __sbitmap_get_word  -> nr=-1              __blk_mq_get_tag
  sbitmap_deferred_clear                    __sbitmap_queue_get
  /* map->cleared=0xFFFFFFFFFFFFFFFF */     sbitmap_find_bit
    if (!READ_ONCE(map->cleared))           sbitmap_find_bit_in_word
      return false;                         __sbitmap_get_word -> nr=-1
    mask = xchg(&map->cleared, 0)           sbitmap_deferred_clear
    atomic_long_andnot()                    /* map->cleared=0 */
                                              if (!(map->cleared))
                                                return false;
                                     /*
                                      * map->cleared is cleared by T1
                                      * T2 fail to acquire the tag
                                      */

4. T2 is the sole tag waiter. When T1 puts the tag, T2 cannot be woken
up due to the wake_batch being set at 6. If no more requests come, T1
will wait here indefinitely.

This patch achieves two purposes:
1. Check on ->cleared and update on both ->cleared and ->word need to
be done atomically, and using spinlock could be the simplest solution.
2. Add extra check in sbitmap_deferred_clear(), to identify whether
->word has free bits.

Fixes: ea86ea2cdced ("sbitmap: ammortize cost of clearing bits")
Signed-off-by: Yang Yang <yang.yang@vivo.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240716082644.659566-1-yang.yang@vivo.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sbitmap.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index d662cf136021..c09cdcc99471 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -36,6 +36,11 @@ struct sbitmap_word {
 	 * @cleared: word holding cleared bits
 	 */
 	unsigned long cleared ____cacheline_aligned_in_smp;
+
+	/**
+	 * @swap_lock: serializes simultaneous updates of ->word and ->cleared
+	 */
+	spinlock_t swap_lock;
 } ____cacheline_aligned_in_smp;
 
 /**
-- 
cgit v1.2.3


From 89ed6c9ac69ec398ccb648f5f675b43e8ca679ca Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Tue, 16 Jul 2024 13:30:58 +0000
Subject: blk-cgroup: move congestion_count to struct blkcg

The congestion_count was introduced into the struct cgroup by
commit d09d8df3a294 ("blkcg: add generic throttling mechanism"),
but since it is closely related to the blkio subsys, it is not
appropriate to put it in the struct cgroup, so let's move it to
struct blkcg. There should be no functional changes because blkcg
is per cgroup.

Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20240716133058.3491350-1-xiujianfeng@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/cgroup-defs.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index ea48c861cd36..ec82c4b7ed4d 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -534,9 +534,6 @@ struct cgroup {
 	/* used to store eBPF programs */
 	struct cgroup_bpf bpf;
 
-	/* If there is block congestion on this cgroup. */
-	atomic_t congestion_count;
-
 	/* Used to store internal freezer state */
 	struct cgroup_freezer_state freezer;
 
-- 
cgit v1.2.3


From 9651fcedf7b92d3f7f1ab179e8ab55b85ee10fc1 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 8 Dec 2022 17:55:04 +0100
Subject: mm: add MAP_DROPPABLE for designating always lazily freeable mappings

The vDSO getrandom() implementation works with a buffer allocated with a
new system call that has certain requirements:

- It shouldn't be written to core dumps.
  * Easy: VM_DONTDUMP.
- It should be zeroed on fork.
  * Easy: VM_WIPEONFORK.

- It shouldn't be written to swap.
  * Uh-oh: mlock is rlimited.
  * Uh-oh: mlock isn't inherited by forks.

- It shouldn't reserve actual memory, but it also shouldn't crash when
  page faulting in memory if none is available
  * Uh-oh: VM_NORESERVE means segfaults.

It turns out that the vDSO getrandom() function has three really nice
characteristics that we can exploit to solve this problem:

1) Due to being wiped during fork(), the vDSO code is already robust to
   having the contents of the pages it reads zeroed out midway through
   the function's execution.

2) In the absolute worst case of whatever contingency we're coding for,
   we have the option to fallback to the getrandom() syscall, and
   everything is fine.

3) The buffers the function uses are only ever useful for a maximum of
   60 seconds -- a sort of cache, rather than a long term allocation.

These characteristics mean that we can introduce VM_DROPPABLE, which
has the following semantics:

a) It never is written out to swap.
b) Under memory pressure, mm can just drop the pages (so that they're
   zero when read back again).
c) It is inherited by fork.
d) It doesn't count against the mlock budget, since nothing is locked.
e) If there's not enough memory to service a page fault, it's not fatal,
   and no signal is sent.

This way, allocations used by vDSO getrandom() can use:

    VM_DROPPABLE | VM_DONTDUMP | VM_WIPEONFORK | VM_NORESERVE

And there will be no problem with OOMing, crashing on overcommitment,
using memory when not in use, not wiping on fork(), coredumps, or
writing out to swap.

In order to let vDSO getrandom() use this, expose these via mmap(2) as
MAP_DROPPABLE.

Note that this involves removing the MADV_FREE special case from
sort_folio(), which according to Yu Zhao is unnecessary and will simply
result in an extra call to shrink_folio_list() in the worst case. The
chunk removed reenables the swapbacked flag, which we don't want for
VM_DROPPABLE, and we can't conditionalize it here because there isn't a
vma reference available.

Finally, the provided self test ensures that this is working as desired.

Cc: linux-mm@kvack.org
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/linux/mm.h             | 7 +++++++
 include/linux/userfaultfd_k.h  | 3 +++
 include/trace/events/mmflags.h | 7 +++++++
 include/uapi/linux/mman.h      | 1 +
 4 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index eb7c96d24ac0..e078c2890bf8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -406,6 +406,13 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_ALLOW_ANY_UNCACHED		VM_NONE
 #endif
 
+#ifdef CONFIG_64BIT
+#define VM_DROPPABLE_BIT	40
+#define VM_DROPPABLE		BIT(VM_DROPPABLE_BIT)
+#else
+#define VM_DROPPABLE		VM_NONE
+#endif
+
 #ifdef CONFIG_64BIT
 /* VM is sealed, in vm_flags */
 #define VM_SEALED	_BITUL(63)
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 05d59f74fc88..a12bcf042551 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -218,6 +218,9 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
 {
 	vm_flags &= __VM_UFFD_FLAGS;
 
+	if (vm_flags & VM_DROPPABLE)
+		return false;
+
 	if ((vm_flags & VM_UFFD_MINOR) &&
 	    (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
 		return false;
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index e46d6e82765e..b63d211bd141 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -165,6 +165,12 @@ IF_HAVE_PG_ARCH_X(arch_3)
 # define IF_HAVE_UFFD_MINOR(flag, name)
 #endif
 
+#ifdef CONFIG_64BIT
+# define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name},
+#else
+# define IF_HAVE_VM_DROPPABLE(flag, name)
+#endif
+
 #define __def_vmaflag_names						\
 	{VM_READ,			"read"		},		\
 	{VM_WRITE,			"write"		},		\
@@ -197,6 +203,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,	"softdirty"	)		\
 	{VM_MIXEDMAP,			"mixedmap"	},		\
 	{VM_HUGEPAGE,			"hugepage"	},		\
 	{VM_NOHUGEPAGE,			"nohugepage"	},		\
+IF_HAVE_VM_DROPPABLE(VM_DROPPABLE,	"droppable"	)		\
 	{VM_MERGEABLE,			"mergeable"	}		\
 
 #define show_vma_flags(flags)						\
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index a246e11988d5..e89d00528f2f 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -17,6 +17,7 @@
 #define MAP_SHARED	0x01		/* Share changes */
 #define MAP_PRIVATE	0x02		/* Changes are private */
 #define MAP_SHARED_VALIDATE 0x03	/* share + validate extension flags */
+#define MAP_DROPPABLE	0x08		/* Zero memory under memory pressure. */
 
 /*
  * Huge page size encoding when MAP_HUGETLB is specified, and a huge page
-- 
cgit v1.2.3


From 4ad10a5f5f78a5b3e525a63bd075a4eb1139dde1 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 18 Nov 2022 17:23:34 +0100
Subject: random: introduce generic vDSO getrandom() implementation

Provide a generic C vDSO getrandom() implementation, which operates on
an opaque state returned by vgetrandom_alloc() and produces random bytes
the same way as getrandom(). This has the following API signature:

  ssize_t vgetrandom(void *buffer, size_t len, unsigned int flags,
                     void *opaque_state, size_t opaque_len);

The return value and the first three arguments are the same as ordinary
getrandom(), while the last two arguments are a pointer to the opaque
allocated state and its size. Were all five arguments passed to the
getrandom() syscall, nothing different would happen, and the functions
would have the exact same behavior.

The actual vDSO RNG algorithm implemented is the same one implemented by
drivers/char/random.c, using the same fast-erasure techniques as that.
Should the in-kernel implementation change, so too will the vDSO one.

It requires an implementation of ChaCha20 that does not use any stack,
in order to maintain forward secrecy if a multi-threaded program forks
(though this does not account for a similar issue with SA_SIGINFO
copying registers to the stack), so this is left as an
architecture-specific fill-in. Stack-less ChaCha20 is an easy algorithm
to implement on a variety of architectures, so this shouldn't be too
onerous.

Initially, the state is keyless, and so the first call makes a
getrandom() syscall to generate that key, and then uses it for
subsequent calls. By keeping track of a generation counter, it knows
when its key is invalidated and it should fetch a new one using the
syscall. Later, more than just a generation counter might be used.

Since MADV_WIPEONFORK is set on the opaque state, the key and related
state is wiped during a fork(), so secrets don't roll over into new
processes, and the same state doesn't accidentally generate the same
random stream. The generation counter, as well, is always >0, so that
the 0 counter is a useful indication of a fork() or otherwise
uninitialized state.

If the kernel RNG is not yet initialized, then the vDSO always calls the
syscall, because that behavior cannot be emulated in userspace, but
fortunately that state is short lived and only during early boot. If it
has been initialized, then there is no need to inspect the `flags`
argument, because the behavior does not change post-initialization
regardless of the `flags` value.

Since the opaque state passed to it is mutated, vDSO getrandom() is not
reentrant, when used with the same opaque state, which libc should be
mindful of.

The function works over an opaque per-thread state of a particular size,
which must be marked VM_WIPEONFORK, VM_DONTDUMP, VM_NORESERVE, and
VM_DROPPABLE for proper operation. Over time, the nuances of these
allocations may change or grow or even differ based on architectural
features.

The opaque state passed to vDSO getrandom() must be allocated using the
mmap_flags and mmap_prot parameters provided by the vgetrandom_opaque_params
struct, which also contains the size of each state. That struct can be
obtained with a call to vgetrandom(NULL, 0, 0, &params, ~0UL). Then,
libc can call mmap(2) and slice up the returned array into a state per
each thread, while ensuring that no single state straddles a page
boundary. Libc is expected to allocate a chunk of these on first use,
and then dole them out to threads as they're created, allocating more
when needed.

vDSO getrandom() provides the ability for userspace to generate random
bytes quickly and safely, and is intended to be integrated into libc's
thread management. As an illustrative example, the introduced code in
the vdso_test_getrandom self test later in this series might be used to
do the same outside of libc. In a libc the various pthread-isms are
expected to be elided into libc internals.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/uapi/linux/random.h | 15 +++++++++++++++
 include/vdso/datapage.h     | 11 +++++++++++
 include/vdso/getrandom.h    | 46 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+)
 create mode 100644 include/vdso/getrandom.h

(limited to 'include')

diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h
index e744c23582eb..2a3fe4c2cdc9 100644
--- a/include/uapi/linux/random.h
+++ b/include/uapi/linux/random.h
@@ -55,4 +55,19 @@ struct rand_pool_info {
 #define GRND_RANDOM	0x0002
 #define GRND_INSECURE	0x0004
 
+/**
+ * struct vgetrandom_opaque_params - arguments for allocating memory for vgetrandom
+ *
+ * @size_per_opaque_state:	Size of each state that is to be passed to vgetrandom().
+ * @mmap_prot:			Value of the prot argument in mmap(2).
+ * @mmap_flags:			Value of the flags argument in mmap(2).
+ * @reserved:			Reserved for future use.
+ */
+struct vgetrandom_opaque_params {
+	__u32 size_of_opaque_state;
+	__u32 mmap_prot;
+	__u32 mmap_flags;
+	__u32 reserved[13];
+};
+
 #endif /* _UAPI_LINUX_RANDOM_H */
diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
index d04d394db064..05e5787beb73 100644
--- a/include/vdso/datapage.h
+++ b/include/vdso/datapage.h
@@ -113,6 +113,16 @@ struct vdso_data {
 	struct arch_vdso_data	arch_data;
 };
 
+/**
+ * struct vdso_rng_data - vdso RNG state information
+ * @generation:	counter representing the number of RNG reseeds
+ * @is_ready:	boolean signaling whether the RNG is initialized
+ */
+struct vdso_rng_data {
+	u64	generation;
+	u8	is_ready;
+};
+
 /*
  * We use the hidden visibility to prevent the compiler from generating a GOT
  * relocation. Not only is going through a GOT useless (the entry couldn't and
@@ -124,6 +134,7 @@ struct vdso_data {
  */
 extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden")));
 extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden")));
+extern struct vdso_rng_data _vdso_rng_data __attribute__((visibility("hidden")));
 
 /**
  * union vdso_data_store - Generic vDSO data page
diff --git a/include/vdso/getrandom.h b/include/vdso/getrandom.h
new file mode 100644
index 000000000000..a8b7c14b0ae0
--- /dev/null
+++ b/include/vdso/getrandom.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _VDSO_GETRANDOM_H
+#define _VDSO_GETRANDOM_H
+
+#include <linux/types.h>
+
+#define CHACHA_KEY_SIZE         32
+#define CHACHA_BLOCK_SIZE       64
+
+/**
+ * struct vgetrandom_state - State used by vDSO getrandom().
+ *
+ * @batch:	One and a half ChaCha20 blocks of buffered RNG output.
+ *
+ * @key:	Key to be used for generating next batch.
+ *
+ * @batch_key:	Union of the prior two members, which is exactly two full
+ * 		ChaCha20 blocks in size, so that @batch and @key can be filled
+ * 		together.
+ *
+ * @generation:	Snapshot of @rng_info->generation in the vDSO data page at
+ *		the time @key was generated.
+ *
+ * @pos:	Offset into @batch of the next available random byte.
+ *
+ * @in_use:	Reentrancy guard for reusing a state within the same thread
+ *		due to signal handlers.
+ */
+struct vgetrandom_state {
+	union {
+		struct {
+			u8	batch[CHACHA_BLOCK_SIZE * 3 / 2];
+			u32	key[CHACHA_KEY_SIZE / sizeof(u32)];
+		};
+		u8		batch_key[CHACHA_BLOCK_SIZE * 2];
+	};
+	u64			generation;
+	u8			pos;
+	bool 			in_use;
+};
+
+#endif /* _VDSO_GETRANDOM_H */
-- 
cgit v1.2.3


From 13f75d9ecf3d8efcf597dfcd8f7be7460cdaa11a Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 15 Jul 2024 04:50:07 +0200
Subject: random: note that RNDGETPOOL was removed in 2.6.9-rc2

RNDGETPOOL was thankfully removed twenty years ago, but it's stuck
around in headers. Probably removing it from uapi headers isn't great in
case there are some weird users out there, but we should at least mark
this as having been removed, to save future readers the same goose chase
I just went on.

Link: https://lore.kernel.org/all/E1By1St-0001TS-Qj@thunk.org/
Link: https://lore.kernel.org/all/Pine.LNX.4.58.0409130937050.4094@ppc970.osdl.org/
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 include/uapi/linux/random.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h
index 2a3fe4c2cdc9..1dd047ec98a1 100644
--- a/include/uapi/linux/random.h
+++ b/include/uapi/linux/random.h
@@ -20,7 +20,7 @@
 /* Add to (or subtract from) the entropy count.  (Superuser only.) */
 #define RNDADDTOENTCNT	_IOW( 'R', 0x01, int )
 
-/* Get the contents of the entropy pool.  (Superuser only.) */
+/* Get the contents of the entropy pool.  (Superuser only.) (Removed in 2.6.9-rc2.) */
 #define RNDGETPOOL	_IOR( 'R', 0x02, int [2] )
 
 /* 
-- 
cgit v1.2.3


From 45b8ee7182d5ef8df6959297046f86dc128d6a06 Mon Sep 17 00:00:00 2001
From: Bastien Curutchet <bastien.curutchet@bootlin.com>
Date: Mon, 17 Jun 2024 14:08:18 +0200
Subject: i2c: mux: gpio: Add support for the 'settle-time-us' property

Some hardware need some time to switch from a bus to another. This can
cause the first transfers following the selection of a bus to fail.
There is no way to configure this kind of waiting time in the driver.

Add support for the 'settle-time-us' device-tree property. When set,
the i2c_mux_gpio_select() applies a delay before returning, leaving
enough time to the hardware to switch to the new bus.

Signed-off-by: Bastien Curutchet <bastien.curutchet@bootlin.com>
Reviewed-by: Andi Shyti <andi.shyti@kernel.org>
Acked-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
---
 include/linux/platform_data/i2c-mux-gpio.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/i2c-mux-gpio.h b/include/linux/platform_data/i2c-mux-gpio.h
index 816a4cd3ccb5..96843aab4d1e 100644
--- a/include/linux/platform_data/i2c-mux-gpio.h
+++ b/include/linux/platform_data/i2c-mux-gpio.h
@@ -19,6 +19,7 @@
  *	position
  * @n_values: Number of multiplexer positions (busses to instantiate)
  * @idle: Bitmask to write to MUX when idle or GPIO_I2CMUX_NO_IDLE if not used
+ * @settle_time: Delay to wait when a new bus is selected
  */
 struct i2c_mux_gpio_platform_data {
 	int parent;
@@ -26,6 +27,7 @@ struct i2c_mux_gpio_platform_data {
 	const unsigned *values;
 	int n_values;
 	unsigned idle;
+	u32 settle_time;
 };
 
 #endif /* _LINUX_I2C_MUX_GPIO_H */
-- 
cgit v1.2.3


From d83763e44944d4d6dee38099c5d0a0984ac66385 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 16 Jul 2024 10:36:24 +0200
Subject: i2c: header: remove unneeded stuff regarding i2c_algorithm

The forward declaration is not needed anymore. The sentence about
"following structs" became obsolete when struct i2c_algorithm became a
kdoc. The paragraph about return values can go because we have this
information in kdoc already.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index e9cc14b1f9a1..1e34b486f604 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -30,7 +30,6 @@ extern const struct device_type i2c_client_type;
 /* --- General options ------------------------------------------------	*/
 
 struct i2c_msg;
-struct i2c_algorithm;
 struct i2c_adapter;
 struct i2c_client;
 struct i2c_driver;
@@ -533,8 +532,6 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info,
  * @reg_slave: deprecated, use @reg_target
  * @unreg_slave: deprecated, use @unreg_target
  *
- *
- * The following structs are for those who like to implement new bus drivers:
  * i2c_algorithm is the interface to a class of hardware solutions which can
  * be addressed using the same bus algorithms - i.e. bit-banging or the PCF8584
  * to name two of the most common.
@@ -550,9 +547,6 @@ struct i2c_algorithm {
 	 * to NULL. If an adapter algorithm can do SMBus access, set
 	 * smbus_xfer. If set to NULL, the SMBus protocol is simulated
 	 * using common I2C messages.
-	 *
-	 * xfer should return the number of messages successfully
-	 * processed, or a negative value on error
 	 */
 	union {
 		int (*xfer)(struct i2c_adapter *adap, struct i2c_msg *msgs,
-- 
cgit v1.2.3


From 385ac870bdd531348de123d6790626ccd7827f69 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 16 Jul 2024 10:36:25 +0200
Subject: i2c: header: improve kdoc for i2c_algorithm

Reword the explanation of @xfer, the old one was confusing and mixing up
terminology. Other than that, capitalize some words correctly and use
full line length.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
---
 include/linux/i2c.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 1e34b486f604..8caaa13834bf 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -511,16 +511,15 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info,
 #endif /* I2C_BOARDINFO */
 
 /**
- * struct i2c_algorithm - represent I2C transfer method
- * @xfer: Issue a set of i2c transactions to the given I2C adapter
- *   defined by the msgs array, with num messages available to transfer via
- *   the adapter specified by adap.
- * @xfer_atomic: same as @xfer. Yet, only using atomic context
- *   so e.g. PMICs can be accessed very late before shutdown. Optional.
- * @smbus_xfer: Issue smbus transactions to the given I2C adapter. If this
+ * struct i2c_algorithm - represent I2C transfer methods
+ * @xfer: Transfer a given number of messages defined by the msgs array via
+ *   the specified adapter.
+ * @xfer_atomic: Same as @xfer. Yet, only using atomic context so e.g. PMICs
+ *   can be accessed very late before shutdown. Optional.
+ * @smbus_xfer: Issue SMBus transactions to the given I2C adapter. If this
  *   is not present, then the bus layer will try and convert the SMBus calls
  *   into I2C transfers instead.
- * @smbus_xfer_atomic: same as @smbus_xfer. Yet, only using atomic context
+ * @smbus_xfer_atomic: Same as @smbus_xfer. Yet, only using atomic context
  *   so e.g. PMICs can be accessed very late before shutdown. Optional.
  * @functionality: Return the flags that this algorithm/adapter pair supports
  *   from the ``I2C_FUNC_*`` flags.
-- 
cgit v1.2.3


From eabd9db64ea8ba64d2a0b1d70da38e1a95dcd08b Mon Sep 17 00:00:00 2001
From: Haibo Xu <haibo1.xu@intel.com>
Date: Thu, 13 Jun 2024 16:54:33 +0800
Subject: ACPI: RISCV: Add NUMA support based on SRAT and SLIT

Add acpi_numa.c file to enable parse NUMA information from
ACPI SRAT and SLIT tables. SRAT table provide CPUs(Hart) and
memory nodes to proximity domain mapping, while SLIT table
provide the distance metrics between proximity domains.

Signed-off-by: Haibo Xu <haibo1.xu@intel.com>
Reviewed-by: Sunil V L <sunilvl@ventanamicro.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Link: https://lore.kernel.org/r/65dbad1fda08a32922c44886e4581e49b4a2fecc.1718268003.git.haibo1.xu@intel.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 include/linux/acpi.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 28c3fb2bef0d..5a2b620ccd58 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -264,6 +264,12 @@ static inline void
 acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { }
 #endif
 
+#ifdef CONFIG_RISCV
+void acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa);
+#else
+static inline void acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) { }
+#endif
+
 #ifndef PHYS_CPUID_INVALID
 typedef u32 phys_cpuid_t;
 #define PHYS_CPUID_INVALID (phys_cpuid_t)(-1)
-- 
cgit v1.2.3


From 10a0e6f3d3db7dcfe36e578923e5f038f1d2b72a Mon Sep 17 00:00:00 2001
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
Date: Wed, 17 Jul 2024 11:49:40 +0200
Subject: timers/migration: Move hierarchy setup into cpuhotplug prepare
 callback

When a CPU comes online the first time, it is possible that a new top level
group will be created. In general all propagation is done from the bottom
to top. This minimizes complexity and prevents possible races. But when a
new top level group is created, the formely top level group needs to be
connected to the new level. This is the only time, when the direction to
propagate changes is changed: the changes are propagated from top (new top
level group) to bottom (formerly top level group).

This introduces two races (see (A) and (B)) as reported by Frederic:

(A) This race happens, when marking the formely top level group as active,
but the last active CPU of the formerly top level group goes idle. Then
it's likely that formerly group is no longer active, but marked
nevertheless as active in new top level group:

		  [GRP0:0]
	       migrator = 0
	       active   = 0
	       nextevt  = KTIME_MAX
	       /         \
	      0         1 .. 7
	  active         idle

0) Hierarchy has for now only 8 CPUs and CPU 0 is the only active CPU.

			     [GRP1:0]
			migrator = TMIGR_NONE
			active   = NONE
			nextevt  = KTIME_MAX
					 \
		 [GRP0:0]                  [GRP0:1]
	      migrator = 0              migrator = TMIGR_NONE
	      active   = 0              active   = NONE
	      nextevt  = KTIME_MAX      nextevt  = KTIME_MAX
		/         \
	      0          1 .. 7                8
	  active         idle                !online

1) CPU 8 is booting and creates a new group in first level GRP0:1 and
   therefore also a new top group GRP1:0. For now the setup code proceeded
   only until the connected between GRP0:1 to the new top group. The
   connection between CPU8 and GRP0:1 is not yet established and CPU 8 is
   still !online.

			     [GRP1:0]
			migrator = TMIGR_NONE
			active   = NONE
			nextevt  = KTIME_MAX
		       /                  \
		 [GRP0:0]                  [GRP0:1]
	      migrator = 0              migrator = TMIGR_NONE
	      active   = 0              active   = NONE
	      nextevt  = KTIME_MAX      nextevt  = KTIME_MAX
		/         \
	      0          1 .. 7                8
	  active         idle                !online

2) Setup code now connects GRP0:0 to GRP1:0 and observes while in
   tmigr_connect_child_parent() that GRP0:0 is not TMIGR_NONE. So it
   prepares to call tmigr_active_up() on it. It hasn't done it yet.

			     [GRP1:0]
			migrator = TMIGR_NONE
			active   = NONE
			nextevt  = KTIME_MAX
		       /                  \
		 [GRP0:0]                  [GRP0:1]
	      migrator = TMIGR_NONE        migrator = TMIGR_NONE
	      active   = NONE              active   = NONE
	      nextevt  = KTIME_MAX         nextevt  = KTIME_MAX
		/         \
	      0          1 .. 7                8
	    idle         idle                !online

3) CPU 0 goes idle. Since GRP0:0->parent has been updated by CPU 8 with
   GRP0:0->lock held, CPU 0 observes GRP1:0 after calling
   tmigr_update_events() and it propagates the change to the top (no change
   there and no wakeup programmed since there is no timer).

			     [GRP1:0]
			migrator = GRP0:0
			active   = GRP0:0
			nextevt  = KTIME_MAX
		       /                  \
		 [GRP0:0]                  [GRP0:1]
	      migrator = TMIGR_NONE       migrator = TMIGR_NONE
	      active   = NONE             active   = NONE
	      nextevt  = KTIME_MAX        nextevt  = KTIME_MAX
		/         \
	      0          1 .. 7                8
	    idle         idle                !online

4) Now the setup code finally calls tmigr_active_up() to and sets GRP0:0
   active in GRP1:0

			     [GRP1:0]
			migrator = GRP0:0
			active   = GRP0:0, GRP0:1
			nextevt  = KTIME_MAX
		       /                  \
		 [GRP0:0]                  [GRP0:1]
	      migrator = TMIGR_NONE       migrator = 8
	      active   = NONE             active   = 8
	      nextevt  = KTIME_MAX        nextevt  = KTIME_MAX
		/         \                    |
	      0          1 .. 7                8
	    idle         idle                active

5) Now CPU 8 is connected with GRP0:1 and CPU 8 calls tmigr_active_up() out
   of tmigr_cpu_online().

			     [GRP1:0]
			migrator = GRP0:0
			active   = GRP0:0
			nextevt  = T8
		       /                  \
		 [GRP0:0]                  [GRP0:1]
	      migrator = TMIGR_NONE         migrator = TMIGR_NONE
	      active   = NONE               active   = NONE
	      nextevt  = KTIME_MAX          nextevt  = T8
		/         \                    |
	      0          1 .. 7                8
	    idle         idle                  idle

5) CPU 8 goes idle with a timer T8 and relies on GRP0:0 as the migrator.
   But it's not really active, so T8 gets ignored.

--> The update which is done in third step is not noticed by setup code. So
    a wrong migrator is set to top level group and a timer could get
    ignored.

(B) Reading group->parent and group->childmask when an hierarchy update is
ongoing and reaches the formerly top level group is racy as those values
could be inconsistent. (The notation of migrator and active now slightly
changes in contrast to the above example, as now the childmasks are used.)

			     [GRP1:0]
			migrator = TMIGR_NONE
			active   = 0x00
			nextevt  = KTIME_MAX
					 \
		 [GRP0:0]                  [GRP0:1]
	      migrator = TMIGR_NONE     migrator = TMIGR_NONE
	      active   = 0x00           active   = 0x00
	      nextevt  = KTIME_MAX      nextevt  = KTIME_MAX
	      childmask= 0		childmask= 1
	      parent   = NULL		parent   = GRP1:0
		/         \
	      0          1 .. 7                8
	  idle           idle                !online
	  childmask=1

1) Hierarchy has 8 CPUs. CPU 8 is at the moment in the process of onlining
   but did not yet connect GRP0:0 to GRP1:0.

			     [GRP1:0]
			migrator = TMIGR_NONE
			active   = 0x00
			nextevt  = KTIME_MAX
		       /                  \
		 [GRP0:0]                  [GRP0:1]
	      migrator = TMIGR_NONE     migrator = TMIGR_NONE
	      active   = 0x00           active   = 0x00
	      nextevt  = KTIME_MAX      nextevt  = KTIME_MAX
	      childmask= 0		childmask= 1
	      parent   = GRP1:0		parent   = GRP1:0
		/         \
	      0          1 .. 7                8
	  idle           idle                !online
	  childmask=1

2) Setup code (running on CPU 8) now connects GRP0:0 to GRP1:0, updates
   parent pointer of GRP0:0 and ...

			     [GRP1:0]
			migrator = TMIGR_NONE
			active   = 0x00
			nextevt  = KTIME_MAX
		       /                  \
		 [GRP0:0]                  [GRP0:1]
	      migrator = 0x01           migrator = TMIGR_NONE
	      active   = 0x01           active   = 0x00
	      nextevt  = KTIME_MAX      nextevt  = KTIME_MAX
	      childmask= 0		childmask= 1
	      parent   = GRP1:0		parent   = GRP1:0
		/         \
	      0          1 .. 7                8
	  active          idle                !online
	  childmask=1

	  tmigr_walk.childmask = 0

3) ... CPU 0 comes active in the same time. As migrator in GRP0:0 was
   TMIGR_NONE, childmask of GRP0:0 is stored in update propagation data
   structure tmigr_walk (as update of childmask is not yet
   visible/updated). And now ...

			     [GRP1:0]
			migrator = TMIGR_NONE
			active   = 0x00
			nextevt  = KTIME_MAX
		       /                  \
		 [GRP0:0]                  [GRP0:1]
	      migrator = 0x01           migrator = TMIGR_NONE
	      active   = 0x01           active   = 0x00
	      nextevt  = KTIME_MAX      nextevt  = KTIME_MAX
	      childmask= 2		childmask= 1
	      parent   = GRP1:0		parent   = GRP1:0
		/         \
	      0          1 .. 7                8
	  active          idle                !online
	  childmask=1

	  tmigr_walk.childmask = 0

4) ... childmask of GRP0:0 is updated by CPU 8 (still part of setup
   code).

			     [GRP1:0]
			migrator = 0x00
			active   = 0x00
			nextevt  = KTIME_MAX
		       /                  \
		 [GRP0:0]                  [GRP0:1]
	      migrator = 0x01           migrator = TMIGR_NONE
	      active   = 0x01           active   = 0x00
	      nextevt  = KTIME_MAX      nextevt  = KTIME_MAX
	      childmask= 2		childmask= 1
	      parent   = GRP1:0		parent   = GRP1:0
		/         \
	      0          1 .. 7                8
	  active          idle                !online
	  childmask=1

	  tmigr_walk.childmask = 0

5) CPU 0 sees the connection to GRP1:0 and now propagates active state to
   GRP1:0 but with childmask = 0 as stored in propagation data structure.

--> Now GRP1:0 always has a migrator as 0x00 != TMIGR_NONE and for all CPUs
    it looks like GRP1:0 is always active.

To prevent those races, the setup of the hierarchy is moved into the
cpuhotplug prepare callback. The prepare callback is not executed by the
CPU which will come online, it is executed by the CPU which prepares
onlining of the other CPU. This CPU is active while it is connecting the
formerly top level to the new one. This prevents from (A) to happen and it
also prevents from any further walk above the formerly top level until that
active CPU becomes inactive, releasing the new ->parent and ->childmask
updates to be visible by any subsequent walk up above the formerly top
level hierarchy. This prevents from (B) to happen. The direction for the
updates is now forced to look like "from bottom to top".

However if the active CPU prevents from tmigr_cpu_(in)active() to walk up
with the update not-or-half visible, nothing prevents walking up to the new
top with a 0 childmask in tmigr_handle_remote_up() or
tmigr_requires_handle_remote_up() if the active CPU doing the prepare is
not the migrator. But then it looks fine because:

  * tmigr_check_migrator() should just return false
  * The migrator is active and should eventually observe the new childmask
    at some point in a future tick.

Split setup functionality of online callback into the cpuhotplug prepare
callback and setup hotplug state. Change init call into early_initcall() to
make sure an already active CPU prepares everything for newly upcoming
CPUs. Reorder the code, that all prepare related functions are close to
each other and online and offline callbacks are also close together.

Fixes: 7ee988770326 ("timers: Implement the hierarchical pull model")
Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/r/20240717094940.18687-1-anna-maria@linutronix.de
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 7a5785f405b6..df59666a2a66 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -122,6 +122,7 @@ enum cpuhp_state {
 	CPUHP_KVM_PPC_BOOK3S_PREPARE,
 	CPUHP_ZCOMP_PREPARE,
 	CPUHP_TIMERS_PREPARE,
+	CPUHP_TMIGR_PREPARE,
 	CPUHP_MIPS_SOC_PREPARE,
 	CPUHP_BP_PREPARE_DYN,
 	CPUHP_BP_PREPARE_DYN_END		= CPUHP_BP_PREPARE_DYN + 20,
-- 
cgit v1.2.3


From 835a9a67f54f01033417a254e53a1391f99db708 Mon Sep 17 00:00:00 2001
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
Date: Tue, 16 Jul 2024 16:19:24 +0200
Subject: timers/migration: Rename childmask by groupmask to make naming more
 obvious

childmask in the group reflects the mask that is required to 'reference'
this group in the parent. When reading childmask, this might be confusing,
as this suggests, that this is the mask of the child of the group.

Clarify this by renaming childmask in the tmigr_group and tmc_group by
groupmask.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/r/20240716-tmigr-fixes-v4-6-757baa7803fe@linutronix.de
---
 include/trace/events/timer_migration.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/timer_migration.h b/include/trace/events/timer_migration.h
index 79f19e76a80b..47db5eaf2f9a 100644
--- a/include/trace/events/timer_migration.h
+++ b/include/trace/events/timer_migration.h
@@ -43,7 +43,7 @@ TRACE_EVENT(tmigr_connect_child_parent,
 		__field( unsigned int,	lvl		)
 		__field( unsigned int,	numa_node	)
 		__field( unsigned int,	num_children	)
-		__field( u32,		childmask	)
+		__field( u32,		groupmask	)
 	),
 
 	TP_fast_assign(
@@ -52,11 +52,11 @@ TRACE_EVENT(tmigr_connect_child_parent,
 		__entry->lvl		= child->parent->level;
 		__entry->numa_node	= child->parent->numa_node;
 		__entry->num_children	= child->parent->num_children;
-		__entry->childmask	= child->childmask;
+		__entry->groupmask	= child->groupmask;
 	),
 
-	TP_printk("group=%p childmask=%0x parent=%p lvl=%d numa=%d num_children=%d",
-		  __entry->child,  __entry->childmask, __entry->parent,
+	TP_printk("group=%p groupmask=%0x parent=%p lvl=%d numa=%d num_children=%d",
+		  __entry->child,  __entry->groupmask, __entry->parent,
 		  __entry->lvl, __entry->numa_node, __entry->num_children)
 );
 
@@ -72,7 +72,7 @@ TRACE_EVENT(tmigr_connect_cpu_parent,
 		__field( unsigned int,	lvl		)
 		__field( unsigned int,	numa_node	)
 		__field( unsigned int,	num_children	)
-		__field( u32,		childmask	)
+		__field( u32,		groupmask	)
 	),
 
 	TP_fast_assign(
@@ -81,11 +81,11 @@ TRACE_EVENT(tmigr_connect_cpu_parent,
 		__entry->lvl		= tmc->tmgroup->level;
 		__entry->numa_node	= tmc->tmgroup->numa_node;
 		__entry->num_children	= tmc->tmgroup->num_children;
-		__entry->childmask	= tmc->childmask;
+		__entry->groupmask	= tmc->groupmask;
 	),
 
-	TP_printk("cpu=%d childmask=%0x parent=%p lvl=%d numa=%d num_children=%d",
-		  __entry->cpu,	 __entry->childmask, __entry->parent,
+	TP_printk("cpu=%d groupmask=%0x parent=%p lvl=%d numa=%d num_children=%d",
+		  __entry->cpu,	 __entry->groupmask, __entry->parent,
 		  __entry->lvl, __entry->numa_node, __entry->num_children)
 );
 
-- 
cgit v1.2.3


From a2b72b81fb3ba18717fc000949ca9d45a3351130 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 24 Jul 2024 12:16:20 +0100
Subject: io_uring: kill REQ_F_CANCEL_SEQ

We removed the reliance on the flag by the cancellation path and now
it's unused.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/e57afe566bbe4fefeb44daffb08900f2a4756577.1721819383.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3bb6198d1523..e62aa9f0629f 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -461,7 +461,6 @@ enum {
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
 	REQ_F_POLL_NO_LAZY_BIT,
-	REQ_F_CANCEL_SEQ_BIT,
 	REQ_F_CAN_POLL_BIT,
 	REQ_F_BL_EMPTY_BIT,
 	REQ_F_BL_NO_RECYCLE_BIT,
@@ -536,8 +535,6 @@ enum {
 	REQ_F_HASH_LOCKED	= IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
 	/* don't use lazy poll wake for this request */
 	REQ_F_POLL_NO_LAZY	= IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
-	/* cancel sequence is set and valid */
-	REQ_F_CANCEL_SEQ	= IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT),
 	/* file is pollable */
 	REQ_F_CAN_POLL		= IO_REQ_FLAG(REQ_F_CAN_POLL_BIT),
 	/* buffer list was empty after selection of buffer */
-- 
cgit v1.2.3


From 78eb4ea25cd5fdbdae7eb9fdf87b99195ff67508 Mon Sep 17 00:00:00 2001
From: Joel Granados <j.granados@samsung.com>
Date: Wed, 24 Jul 2024 20:59:29 +0200
Subject: sysctl: treewide: constify the ctl_table argument of proc_handlers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

const qualify the struct ctl_table argument in the proc_handler function
signatures. This is a prerequisite to moving the static ctl_table
structs into .rodata data which will ensure that proc_handler function
pointers cannot be modified.

This patch has been generated by the following coccinelle script:

```
  virtual patch

  @r1@
  identifier ctl, write, buffer, lenp, ppos;
  identifier func !~ "appldata_(timer|interval)_handler|sched_(rt|rr)_handler|rds_tcp_skbuf_handler|proc_sctp_do_(hmac_alg|rto_min|rto_max|udp_port|alpha_beta|auth|probe_interval)";
  @@

  int func(
  - struct ctl_table *ctl
  + const struct ctl_table *ctl
    ,int write, void *buffer, size_t *lenp, loff_t *ppos);

  @r2@
  identifier func, ctl, write, buffer, lenp, ppos;
  @@

  int func(
  - struct ctl_table *ctl
  + const struct ctl_table *ctl
    ,int write, void *buffer, size_t *lenp, loff_t *ppos)
  { ... }

  @r3@
  identifier func;
  @@

  int func(
  - struct ctl_table *
  + const struct ctl_table *
    ,int , void *, size_t *, loff_t *);

  @r4@
  identifier func, ctl;
  @@

  int func(
  - struct ctl_table *ctl
  + const struct ctl_table *ctl
    ,int , void *, size_t *, loff_t *);

  @r5@
  identifier func, write, buffer, lenp, ppos;
  @@

  int func(
  - struct ctl_table *
  + const struct ctl_table *
    ,int write, void *buffer, size_t *lenp, loff_t *ppos);

```

* Code formatting was adjusted in xfs_sysctl.c to comply with code
  conventions. The xfs_stats_clear_proc_handler,
  xfs_panic_mask_proc_handler and xfs_deprecated_dointvec_minmax where
  adjusted.

* The ctl_table argument in proc_watchdog_common was const qualified.
  This is called from a proc_handler itself and is calling back into
  another proc_handler, making it necessary to change it as part of the
  proc_handler migration.

Co-developed-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Co-developed-by: Joel Granados <j.granados@samsung.com>
Signed-off-by: Joel Granados <j.granados@samsung.com>
---
 include/linux/ftrace.h                    |  4 ++--
 include/linux/mm.h                        |  8 ++++----
 include/linux/perf_event.h                |  6 +++---
 include/linux/security.h                  |  2 +-
 include/linux/sysctl.h                    | 34 +++++++++++++++----------------
 include/linux/vmstat.h                    |  4 ++--
 include/linux/writeback.h                 |  2 +-
 include/net/ndisc.h                       |  2 +-
 include/net/neighbour.h                   |  6 +++---
 include/net/netfilter/nf_hooks_lwtunnel.h |  2 +-
 10 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 51575b76818e..fd5e84d0ec47 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -471,7 +471,7 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
 
 extern int stack_tracer_enabled;
 
-int stack_trace_sysctl(struct ctl_table *table, int write, void *buffer,
+int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer,
 		       size_t *lenp, loff_t *ppos);
 
 /* DO NOT MODIFY THIS VARIABLE DIRECTLY! */
@@ -1175,7 +1175,7 @@ extern int tracepoint_printk;
 extern void disable_trace_on_warning(void);
 extern int __disable_trace_on_warning;
 
-int tracepoint_printk_sysctl(struct ctl_table *table, int write,
+int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
 			     void *buffer, size_t *lenp, loff_t *ppos);
 
 #else /* CONFIG_TRACING */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index aa4fccb2a693..4563b560ced7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -204,11 +204,11 @@ extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
 extern unsigned long sysctl_overcommit_kbytes;
 
-int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *,
+int overcommit_ratio_handler(const struct ctl_table *, int, void *, size_t *,
 		loff_t *);
-int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
+int overcommit_kbytes_handler(const struct ctl_table *, int, void *, size_t *,
 		loff_t *);
-int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
+int overcommit_policy_handler(const struct ctl_table *, int, void *, size_t *,
 		loff_t *);
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
@@ -3854,7 +3854,7 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm);
 
 #ifdef CONFIG_SYSCTL
 extern int sysctl_drop_caches;
-int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *,
+int drop_caches_sysctl_handler(const struct ctl_table *, int, void *, size_t *,
 		loff_t *);
 #endif
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 65ece0d5b4b6..1a8942277dda 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1582,11 +1582,11 @@ extern int sysctl_perf_cpu_time_max_percent;
 
 extern void perf_sample_event_took(u64 sample_len_ns);
 
-int perf_event_max_sample_rate_handler(struct ctl_table *table, int write,
+int perf_event_max_sample_rate_handler(const struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
-int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
+int perf_cpu_time_max_percent_handler(const struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
-int perf_event_max_stack_handler(struct ctl_table *table, int write,
+int perf_event_max_stack_handler(const struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
 
 /* Access to perf_event_open(2) syscall. */
diff --git a/include/linux/security.h b/include/linux/security.h
index de3af33e6ff5..1390f1efb4f0 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -228,7 +228,7 @@ struct request_sock;
 #define LSM_UNSAFE_NO_NEW_PRIVS	4
 
 #ifdef CONFIG_MMU
-extern int mmap_min_addr_handler(struct ctl_table *table, int write,
+extern int mmap_min_addr_handler(const struct ctl_table *table, int write,
 				 void *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 54fbec062772..aa4c6d44aaa0 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -61,31 +61,31 @@ extern const int sysctl_vals[];
 
 extern const unsigned long sysctl_long_vals[];
 
-typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer,
+typedef int proc_handler(const struct ctl_table *ctl, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
 
-int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *);
-int proc_dobool(struct ctl_table *table, int write, void *buffer,
+int proc_dostring(const struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_dobool(const struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
-int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *);
-int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *);
-int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *);
-int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer,
+int proc_dointvec(const struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_douintvec(const struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_dointvec_minmax(const struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_douintvec_minmax(const struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
-int proc_dou8vec_minmax(struct ctl_table *table, int write, void *buffer,
+int proc_dou8vec_minmax(const struct ctl_table *table, int write, void *buffer,
 			size_t *lenp, loff_t *ppos);
-int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *);
-int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
+int proc_dointvec_jiffies(const struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
-int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *,
+int proc_dointvec_userhz_jiffies(const struct ctl_table *, int, void *, size_t *,
 		loff_t *);
-int proc_dointvec_ms_jiffies(struct ctl_table *, int, void *, size_t *,
+int proc_dointvec_ms_jiffies(const struct ctl_table *, int, void *, size_t *,
 		loff_t *);
-int proc_doulongvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *);
-int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, void *,
+int proc_doulongvec_minmax(const struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_doulongvec_ms_jiffies_minmax(const struct ctl_table *table, int, void *,
 		size_t *, loff_t *);
-int proc_do_large_bitmap(struct ctl_table *, int, void *, size_t *, loff_t *);
-int proc_do_static_key(struct ctl_table *table, int write, void *buffer,
+int proc_do_large_bitmap(const struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_do_static_key(const struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
 
 /*
@@ -287,7 +287,7 @@ static inline bool sysctl_is_alias(char *param)
 }
 #endif /* CONFIG_SYSCTL */
 
-int sysctl_max_threads(struct ctl_table *table, int write, void *buffer,
+int sysctl_max_threads(const struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
 
 #endif /* _LINUX_SYSCTL_H */
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 16b0cfa80502..23cd17942036 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -17,7 +17,7 @@ extern int sysctl_stat_interval;
 #define DISABLE_NUMA_STAT   0
 extern int sysctl_vm_numa_stat;
 DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key);
-int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
+int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write,
 		void *buffer, size_t *length, loff_t *ppos);
 #endif
 
@@ -301,7 +301,7 @@ void cpu_vm_stats_fold(int cpu);
 void refresh_zone_stat_thresholds(void);
 
 struct ctl_table;
-int vmstat_refresh(struct ctl_table *, int write, void *buffer, size_t *lenp,
+int vmstat_refresh(const struct ctl_table *, int write, void *buffer, size_t *lenp,
 		loff_t *ppos);
 
 void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 112d806ddbe4..1a54676d843a 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -350,7 +350,7 @@ extern unsigned int dirty_expire_interval;
 extern unsigned int dirtytime_expire_interval;
 extern int laptop_mode;
 
-int dirtytime_interval_handler(struct ctl_table *table, int write,
+int dirtytime_interval_handler(const struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
 
 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 9bbdf6eaa942..7a533d5b1d59 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -486,7 +486,7 @@ void igmp6_event_report(struct sk_buff *skb);
 
 
 #ifdef CONFIG_SYSCTL
-int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write,
+int ndisc_ifinfo_sysctl_change(const struct ctl_table *ctl, int write,
 			       void *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 0d28172193fa..a44f262a7384 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -412,12 +412,12 @@ void *neigh_seq_start(struct seq_file *, loff_t *, struct neigh_table *,
 void *neigh_seq_next(struct seq_file *, void *, loff_t *);
 void neigh_seq_stop(struct seq_file *, void *);
 
-int neigh_proc_dointvec(struct ctl_table *ctl, int write,
+int neigh_proc_dointvec(const struct ctl_table *ctl, int write,
 			void *buffer, size_t *lenp, loff_t *ppos);
-int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
+int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write,
 				void *buffer,
 				size_t *lenp, loff_t *ppos);
-int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
+int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
 				   void *buffer, size_t *lenp, loff_t *ppos);
 
 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
diff --git a/include/net/netfilter/nf_hooks_lwtunnel.h b/include/net/netfilter/nf_hooks_lwtunnel.h
index 52e27920f829..cef7a4eb8f97 100644
--- a/include/net/netfilter/nf_hooks_lwtunnel.h
+++ b/include/net/netfilter/nf_hooks_lwtunnel.h
@@ -2,6 +2,6 @@
 #include <linux/types.h>
 
 #ifdef CONFIG_SYSCTL
-int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+int nf_hooks_lwtunnel_sysctl_handler(const struct ctl_table *table, int write,
 				     void *buffer, size_t *lenp, loff_t *ppos);
 #endif
-- 
cgit v1.2.3


From d5e726d9143c5624135f5dc9e4069799adeef734 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@fomichev.me>
Date: Fri, 12 Jul 2024 18:52:51 -0700
Subject: xsk: Require XDP_UMEM_TX_METADATA_LEN to actuate tx_metadata_len

Julian reports that commit 341ac980eab9 ("xsk: Support tx_metadata_len")
can break existing use cases which don't zero-initialize xdp_umem_reg
padding. Introduce new XDP_UMEM_TX_METADATA_LEN to make sure we
interpret the padding as tx_metadata_len only when being explicitly
asked.

Fixes: 341ac980eab9 ("xsk: Support tx_metadata_len")
Reported-by: Julian Schindel <mail@arctic-alpaca.de>
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20240713015253.121248-2-sdf@fomichev.me
---
 include/uapi/linux/if_xdp.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index d31698410410..42ec5ddaab8d 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -41,6 +41,10 @@
  */
 #define XDP_UMEM_TX_SW_CSUM		(1 << 1)
 
+/* Request to reserve tx_metadata_len bytes of per-chunk metadata.
+ */
+#define XDP_UMEM_TX_METADATA_LEN	(1 << 2)
+
 struct sockaddr_xdp {
 	__u16 sxdp_family;
 	__u16 sxdp_flags;
-- 
cgit v1.2.3


From 9722c3b66e21ff08aec570d02a97d331087fd70f Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca.ceresoli@bootlin.com>
Date: Wed, 24 Jul 2024 18:33:06 +0200
Subject: of: remove internal arguments from of_property_for_each_u32()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The of_property_for_each_u32() macro needs five parameters, two of which
are primarily meant as internal variables for the macro itself (in the
for() clause). Yet these two parameters are used by a few drivers, and this
can be considered misuse or at least bad practice.

Now that the kernel uses C11 to build, these two parameters can be avoided
by declaring them internally, thus changing this pattern:

  struct property *prop;
  const __be32 *p;
  u32 val;

  of_property_for_each_u32(np, "xyz", prop, p, val) { ... }

to this:

  u32 val;

  of_property_for_each_u32(np, "xyz", val) { ... }

However two variables cannot be declared in the for clause even with C11,
so declare one struct that contain the two variables we actually need. As
the variables inside this struct are not meant to be used by users of this
macro, give the struct instance the noticeable name "_it" so it is visible
during code reviews, helping to avoid new code to use it directly.

Most usages are trivially converted as they do not use those two
parameters, as expected. The non-trivial cases are:

 - drivers/clk/clk.c, of_clk_get_parent_name(): easily doable anyway
 - drivers/clk/clk-si5351.c, si5351_dt_parse(): this is more complex as the
   checks had to be replicated in a different way, making code more verbose
   and somewhat uglier, but I refrained from a full rework to keep as much
   of the original code untouched having no hardware to test my changes

All the changes have been build tested. The few for which I have the
hardware have been runtime-tested too.

Reviewed-by: Andre Przywara <andre.przywara@arm.com> # drivers/clk/sunxi/clk-simple-gates.c, drivers/clk/sunxi/clk-sun8i-bus-gates.c
Acked-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org> # drivers/gpio/gpio-brcmstb.c
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com> # drivers/irqchip/irq-atmel-aic-common.c
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> # drivers/iio/adc/ti_am335x_adc.c
Acked-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com> # drivers/pwm/pwm-samsung.c
Acked-by: Richard Leitner <richard.leitner@linux.dev> # drivers/usb/misc/usb251xb.c
Acked-by: Mark Brown <broonie@kernel.org> # sound/soc/codecs/arizona.c
Reviewed-by: Richard Fitzgerald <rf@opensource.cirrus.com> # sound/soc/codecs/arizona.c
Acked-by: Michael Ellerman <mpe@ellerman.id.au> # arch/powerpc/sysdev/xive/spapr.c
Acked-by: Stephen Boyd <sboyd@kernel.org> # clk
Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
Acked-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/20240724-of_property_for_each_u32-v3-1-bea82ce429e2@bootlin.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 include/linux/of.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index 13cf7a43b473..85b60ac9eec5 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -430,11 +430,9 @@ extern int of_detach_node(struct device_node *);
 #define of_match_ptr(_ptr)	(_ptr)
 
 /*
- * struct property *prop;
- * const __be32 *p;
  * u32 u;
  *
- * of_property_for_each_u32(np, "propname", prop, p, u)
+ * of_property_for_each_u32(np, "propname", u)
  *         printk("U32 value: %x\n", u);
  */
 const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur,
@@ -1431,11 +1429,12 @@ static inline int of_property_read_s32(const struct device_node *np,
 	     err == 0;							\
 	     err = of_phandle_iterator_next(it))
 
-#define of_property_for_each_u32(np, propname, prop, p, u)	\
-	for (prop = of_find_property(np, propname, NULL),	\
-		p = of_prop_next_u32(prop, NULL, &u);		\
-		p;						\
-		p = of_prop_next_u32(prop, p, &u))
+#define of_property_for_each_u32(np, propname, u)			\
+	for (struct {struct property *prop; const __be32 *item; } _it =	\
+		{of_find_property(np, propname, NULL),			\
+		 of_prop_next_u32(_it.prop, NULL, &u)};			\
+	     _it.item;							\
+	     _it.item = of_prop_next_u32(_it.prop, _it.item, &u))
 
 #define of_property_for_each_string(np, propname, prop, s)	\
 	for (prop = of_find_property(np, propname, NULL),	\
-- 
cgit v1.2.3


From 63c33ca0969cf4d4574103b69fb46f58a19a182b Mon Sep 17 00:00:00 2001
From: Bhoomik Gupta <bhoomik.gupta@nxp.com>
Date: Mon, 8 Jul 2024 11:08:35 +0530
Subject: i3c: master: Enhance i3c_bus_type visibility for device searching &
 event monitoring

Improve the visibility of i3c_bus_type to facilitate searching for
i3c devices attached to the i3c bus. Enable other drivers to use
bus_register_notifier to monitor i3c bus device events.

Signed-off-by: Bhoomik Gupta <bhoomik.gupta@nxp.com>
Link: https://lore.kernel.org/r/20240708053835.3003986-1-bhoomik.gupta@nxp.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/i3c/master.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index 0ca27dd86956..074f632868d9 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -33,6 +33,7 @@ enum {
 struct i3c_master_controller;
 struct i3c_bus;
 struct i3c_device;
+extern const struct bus_type i3c_bus_type;
 
 /**
  * struct i3c_i2c_dev_desc - Common part of the I3C/I2C device descriptor
-- 
cgit v1.2.3


From 24168c5e6dfbdd5b414f048f47f75d64533296ca Mon Sep 17 00:00:00 2001
From: Carlos Song <carlos.song@nxp.com>
Date: Mon, 15 Jul 2024 18:53:51 -0400
Subject: dt-bindings: i3c: add header for generic I3C flags

Add header file for generic I3C flags to avoid hard code in dts file.

Signed-off-by: Carlos Song <carlos.song@nxp.com>
Reviewed-by: Frank Li <frank.li@nxp.com>
Acked-by: Jason Liu <jason.hui.liu@nxp.com>
Signed-off-by: Frank Li <Frank.Li@nxp.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240715225351.3237284-1-Frank.Li@nxp.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/dt-bindings/i3c/i3c.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 include/dt-bindings/i3c/i3c.h

(limited to 'include')

diff --git a/include/dt-bindings/i3c/i3c.h b/include/dt-bindings/i3c/i3c.h
new file mode 100644
index 000000000000..373439218bba
--- /dev/null
+++ b/include/dt-bindings/i3c/i3c.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+/*
+ * Copyright 2024 NXP
+ */
+
+#ifndef _DT_BINDINGS_I3C_I3C_H
+#define _DT_BINDINGS_I3C_I3C_H
+
+#define I2C_FM      (1 << 4)
+#define I2C_FM_PLUS (0 << 4)
+
+#define I2C_FILTER  (0 << 5)
+#define I2C_NO_FILTER_HIGH_FREQUENCY    (1 << 5)
+#define I2C_NO_FILTER_LOW_FREQUENCY     (2 << 5)
+
+#endif
-- 
cgit v1.2.3


From 342b2e395d5f34c9f111a818556e617939f83a8c Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 26 Jul 2024 15:24:30 +0100
Subject: io_uring/napi: use ktime in busy polling

It's more natural to use ktime/ns instead of keeping around usec,
especially since we're comparing it against user provided timers,
so convert napi busy poll internal handling to ktime. It's also nicer
since the type (ktime_t vs unsigned long) now tells the unit of measure.

Keep everything as ktime, which we convert to/from micro seconds for
IORING_[UN]REGISTER_NAPI. The net/ busy polling works seems to work with
usec, however it's not real usec as shift by 10 is used to get it from
nsecs, see busy_loop_current_time(), so it's easy to get truncated nsec
back and we get back better precision.

Note, we can further improve it later by removing the truncation and
maybe convincing net/ to use ktime/ns instead.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/95e7ec8d095069a3ed5d40a4bc6f8b586698bc7e.1722003776.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index e62aa9f0629f..3315005df117 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -404,7 +404,7 @@ struct io_ring_ctx {
 	spinlock_t		napi_lock;	/* napi_list lock */
 
 	/* napi busy poll default timeout */
-	unsigned int		napi_busy_poll_to;
+	ktime_t			napi_busy_poll_dt;
 	bool			napi_prefer_busy_poll;
 	bool			napi_enabled;
 
-- 
cgit v1.2.3


From d659b715e94ac039803d7601505d3473393fc0be Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Mon, 15 Jul 2024 10:04:23 +1000
Subject: mm/huge_memory: avoid PMD-size page cache if needed

xarray can't support arbitrary page cache size.  the largest and supported
page cache size is defined as MAX_PAGECACHE_ORDER by commit 099d90642a71
("mm/filemap: make MAX_PAGECACHE_ORDER acceptable to xarray").  However,
it's possible to have 512MB page cache in the huge memory's collapsing
path on ARM64 system whose base page size is 64KB.  512MB page cache is
breaking the limitation and a warning is raised when the xarray entry is
split as shown in the following example.

[root@dhcp-10-26-1-207 ~]# cat /proc/1/smaps | grep KernelPageSize
KernelPageSize:       64 kB
[root@dhcp-10-26-1-207 ~]# cat /tmp/test.c
   :
int main(int argc, char **argv)
{
	const char *filename = TEST_XFS_FILENAME;
	int fd = 0;
	void *buf = (void *)-1, *p;
	int pgsize = getpagesize();
	int ret = 0;

	if (pgsize != 0x10000) {
		fprintf(stdout, "System with 64KB base page size is required!\n");
		return -EPERM;
	}

	system("echo 0 > /sys/devices/virtual/bdi/253:0/read_ahead_kb");
	system("echo 1 > /proc/sys/vm/drop_caches");

	/* Open the xfs file */
	fd = open(filename, O_RDONLY);
	assert(fd > 0);

	/* Create VMA */
	buf = mmap(NULL, TEST_MEM_SIZE, PROT_READ, MAP_SHARED, fd, 0);
	assert(buf != (void *)-1);
	fprintf(stdout, "mapped buffer at 0x%p\n", buf);

	/* Populate VMA */
	ret = madvise(buf, TEST_MEM_SIZE, MADV_NOHUGEPAGE);
	assert(ret == 0);
	ret = madvise(buf, TEST_MEM_SIZE, MADV_POPULATE_READ);
	assert(ret == 0);

	/* Collapse VMA */
	ret = madvise(buf, TEST_MEM_SIZE, MADV_HUGEPAGE);
	assert(ret == 0);
	ret = madvise(buf, TEST_MEM_SIZE, MADV_COLLAPSE);
	if (ret) {
		fprintf(stdout, "Error %d to madvise(MADV_COLLAPSE)\n", errno);
		goto out;
	}

	/* Split xarray entry. Write permission is needed */
	munmap(buf, TEST_MEM_SIZE);
	buf = (void *)-1;
	close(fd);
	fd = open(filename, O_RDWR);
	assert(fd > 0);
	fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
 		  TEST_MEM_SIZE - pgsize, pgsize);
out:
	if (buf != (void *)-1)
		munmap(buf, TEST_MEM_SIZE);
	if (fd > 0)
		close(fd);

	return ret;
}

[root@dhcp-10-26-1-207 ~]# gcc /tmp/test.c -o /tmp/test
[root@dhcp-10-26-1-207 ~]# /tmp/test
 ------------[ cut here ]------------
 WARNING: CPU: 25 PID: 7560 at lib/xarray.c:1025 xas_split_alloc+0xf8/0x128
 Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib    \
 nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct      \
 nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4      \
 ip_set rfkill nf_tables nfnetlink vfat fat virtio_balloon drm fuse   \
 xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 virtio_net  \
 sha1_ce net_failover virtio_blk virtio_console failover dimlib virtio_mmio
 CPU: 25 PID: 7560 Comm: test Kdump: loaded Not tainted 6.10.0-rc7-gavin+ #9
 Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-1.el9 05/24/2024
 pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
 pc : xas_split_alloc+0xf8/0x128
 lr : split_huge_page_to_list_to_order+0x1c4/0x780
 sp : ffff8000ac32f660
 x29: ffff8000ac32f660 x28: ffff0000e0969eb0 x27: ffff8000ac32f6c0
 x26: 0000000000000c40 x25: ffff0000e0969eb0 x24: 000000000000000d
 x23: ffff8000ac32f6c0 x22: ffffffdfc0700000 x21: 0000000000000000
 x20: 0000000000000000 x19: ffffffdfc0700000 x18: 0000000000000000
 x17: 0000000000000000 x16: ffffd5f3708ffc70 x15: 0000000000000000
 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
 x11: ffffffffffffffc0 x10: 0000000000000040 x9 : ffffd5f3708e692c
 x8 : 0000000000000003 x7 : 0000000000000000 x6 : ffff0000e0969eb8
 x5 : ffffd5f37289e378 x4 : 0000000000000000 x3 : 0000000000000c40
 x2 : 000000000000000d x1 : 000000000000000c x0 : 0000000000000000
 Call trace:
  xas_split_alloc+0xf8/0x128
  split_huge_page_to_list_to_order+0x1c4/0x780
  truncate_inode_partial_folio+0xdc/0x160
  truncate_inode_pages_range+0x1b4/0x4a8
  truncate_pagecache_range+0x84/0xa0
  xfs_flush_unmap_range+0x70/0x90 [xfs]
  xfs_file_fallocate+0xfc/0x4d8 [xfs]
  vfs_fallocate+0x124/0x2f0
  ksys_fallocate+0x4c/0xa0
  __arm64_sys_fallocate+0x24/0x38
  invoke_syscall.constprop.0+0x7c/0xd8
  do_el0_svc+0xb4/0xd0
  el0_svc+0x44/0x1d8
  el0t_64_sync_handler+0x134/0x150
  el0t_64_sync+0x17c/0x180

Fix it by correcting the supported page cache orders, different sets for
DAX and other files.  With it corrected, 512MB page cache becomes
disallowed on all non-DAX files on ARM64 system where the base page size
is 64KB.  After this patch is applied, the test program fails with error
-EINVAL returned from __thp_vma_allowable_orders() and the madvise()
system call to collapse the page caches.

Link: https://lkml.kernel.org/r/20240715000423.316491-1-gshan@redhat.com
Fixes: 6b24ca4a1a8d ("mm: Use multi-index entries in the page cache")
Signed-off-by: Gavin Shan <gshan@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Don Dutile <ddutile@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: William Kucharski <william.kucharski@oracle.com>
Cc: <stable@vger.kernel.org>	[5.17+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index cff002be83eb..e25d9ebfdf89 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -74,14 +74,20 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr;
 #define THP_ORDERS_ALL_ANON	((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1)))
 
 /*
- * Mask of all large folio orders supported for file THP.
+ * Mask of all large folio orders supported for file THP. Folios in a DAX
+ * file is never split and the MAX_PAGECACHE_ORDER limit does not apply to
+ * it.
  */
-#define THP_ORDERS_ALL_FILE	(BIT(PMD_ORDER) | BIT(PUD_ORDER))
+#define THP_ORDERS_ALL_FILE_DAX		\
+	(BIT(PMD_ORDER) | BIT(PUD_ORDER))
+#define THP_ORDERS_ALL_FILE_DEFAULT	\
+	((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0))
 
 /*
  * Mask of all large folio orders supported for THP.
  */
-#define THP_ORDERS_ALL		(THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE)
+#define THP_ORDERS_ALL	\
+	(THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DAX | THP_ORDERS_ALL_FILE_DEFAULT)
 
 #define TVA_SMAPS		(1 << 0)	/* Will be used for procfs */
 #define TVA_IN_PF		(1 << 1)	/* Page fault handler */
-- 
cgit v1.2.3


From b3bebe44306e23827397d0d774d206e3fa374041 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Wed, 17 Jul 2024 14:28:44 -0700
Subject: alloc_tag: outline and export free_reserved_page()

Outline and export free_reserved_page() because modules use it and it in
turn uses page_ext_{get|put} which should not be exported.  The same
result could be obtained by outlining {get|put}_page_tag_ref() but that
would have higher performance impact as these functions are used in more
performance critical paths.

Link: https://lkml.kernel.org/r/20240717212844.2749975-1-surenb@google.com
Fixes: dcfe378c81f7 ("lib: introduce support for page allocation tagging")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202407080044.DWMC9N9I-lkp@intel.com/
Suggested-by: Christoph Hellwig <hch@infradead.org>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Sourav Panda <souravpanda@google.com>
Cc: <stable@vger.kernel.org>	[6.10]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4563b560ced7..c4b238a20b76 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3137,21 +3137,7 @@ extern void reserve_bootmem_region(phys_addr_t start,
 				   phys_addr_t end, int nid);
 
 /* Free the reserved page into the buddy system, so it gets managed. */
-static inline void free_reserved_page(struct page *page)
-{
-	if (mem_alloc_profiling_enabled()) {
-		union codetag_ref *ref = get_page_tag_ref(page);
-
-		if (ref) {
-			set_codetag_empty(ref);
-			put_page_tag_ref(ref);
-		}
-	}
-	ClearPageReserved(page);
-	init_page_count(page);
-	__free_page(page);
-	adjust_managed_page_count(page, 1);
-}
+void free_reserved_page(struct page *page);
 #define free_highmem_page(page) free_reserved_page(page)
 
 static inline void mark_page_reserved(struct page *page)
-- 
cgit v1.2.3


From f59adcf5933271ab7247c4c8938c67be8905b725 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Tue, 23 Jul 2024 17:12:44 +0000
Subject: mm: memcg: add cacheline padding after lruvec in mem_cgroup_per_node

Oliver Sand reported a performance regression caused by commit
98c9daf5ae6b ("mm: memcg: guard memcg1-specific members of struct
mem_cgroup_per_node"), which puts some fields of the mem_cgroup_per_node
structure under the CONFIG_MEMCG_V1 config option.  Apparently it causes a
false cache sharing between lruvec and lru_zone_size members of the
structure.  Fix it by adding an explicit padding after the lruvec member.

Even though the padding is not required with CONFIG_MEMCG_V1 set, it seems
like the introduced memory overhead is not significant enough to warrant
another divergence in the mem_cgroup_per_node layout, so the padding is
added unconditionally.

Link: https://lkml.kernel.org/r/20240723171244.747521-1-roman.gushchin@linux.dev
Fixes: 98c9daf5ae6b ("mm: memcg: guard memcg1-specific members of struct mem_cgroup_per_node")
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202407121335.31a10cb6-oliver.sang@intel.com
Tested-by: Oliver Sang <oliver.sang@intel.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7e2eb091049a..0e5bf25d324f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -109,6 +109,7 @@ struct mem_cgroup_per_node {
 
 	/* Fields which get updated often at the end. */
 	struct lruvec		lruvec;
+	CACHELINE_PADDING(_pad2_);
 	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 	struct mem_cgroup_reclaim_iter	iter;
 };
-- 
cgit v1.2.3


From 3a7e02c040b130b5545e4b115aada7bacd80a2b6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 26 Jul 2024 15:32:27 -0700
Subject: minmax: avoid overly complicated constant expressions in VM code

The minmax infrastructure is overkill for simple constants, and can
cause huge expansions because those simple constants are then used by
other things.

For example, 'pageblock_order' is a core VM constant, but because it was
implemented using 'min_t()' and all the type-checking that involves, it
actually expanded to something like 2.5kB of preprocessor noise.

And when that simple constant was then used inside other expansions:

  #define pageblock_nr_pages      (1UL << pageblock_order)
  #define pageblock_start_pfn(pfn)  ALIGN_DOWN((pfn), pageblock_nr_pages)

and we then use that inside a 'max()' macro:

	case ISOLATE_SUCCESS:
		update_cached = false;
		last_migrated_pfn = max(cc->zone->zone_start_pfn,
			pageblock_start_pfn(cc->migrate_pfn - 1));

the end result was that one statement expanding to 253kB in size.

There are probably other cases of this, but this one case certainly
stood out.

I've added 'MIN_T()' and 'MAX_T()' macros for this kind of "core simple
constant with specific type" use.  These macros skip the type checking,
and as such need to be very sparingly used only for obvious cases that
have active issues like this.

Reported-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Link: https://lore.kernel.org/all/36aa2cad-1db1-4abf-8dd2-fb20484aabc3@lucifer.local/
Cc: David Laight <David.Laight@aculab.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/minmax.h          | 7 +++++++
 include/linux/pageblock-flags.h | 4 ++--
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index 2ec559284a9f..a7ef65f78933 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -270,4 +270,11 @@ static inline bool in_range32(u32 val, u32 start, u32 len)
 #define swap(a, b) \
 	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
 
+/*
+ * Use these carefully: no type checking, and uses the arguments
+ * multiple times. Use for obvious constants only.
+ */
+#define MIN_T(type,a,b) __cmp(min,(type)(a),(type)(b))
+#define MAX_T(type,a,b) __cmp(max,(type)(a),(type)(b))
+
 #endif	/* _LINUX_MINMAX_H */
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 547e82cdc89a..fc6b9c87cb0a 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -41,13 +41,13 @@ extern unsigned int pageblock_order;
  * Huge pages are a constant size, but don't exceed the maximum allocation
  * granularity.
  */
-#define pageblock_order		min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER)
+#define pageblock_order		MIN_T(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER)
 
 #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
 
 #elif defined(CONFIG_TRANSPARENT_HUGEPAGE)
 
-#define pageblock_order		min_t(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER)
+#define pageblock_order		MIN_T(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER)
 
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-- 
cgit v1.2.3


From 00e3913b0416fe69d28745c0a2a340e2f76c219c Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Fri, 26 Jul 2024 01:16:48 +0900
Subject: Revert "firewire: Annotate struct fw_iso_packet with __counted_by()"

This reverts commit d3155742db89df3b3c96da383c400e6ff4d23c25.

The header_length field is byte unit, thus it can not express the number of
elements in header field. It seems that the argument for counted_by
attribute can have no arithmetic expression, therefore this commit just
reverts the issued commit.

Suggested-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/20240725161648.130404-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 00abe0e5d602..1cca14cf5652 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -462,9 +462,8 @@ struct fw_iso_packet {
 				/* rx: Sync bit, wait for matching sy	*/
 	u32 tag:2;		/* tx: Tag in packet header		*/
 	u32 sy:4;		/* tx: Sy in packet header		*/
-	u32 header_length:8;	/* Length of immediate header		*/
-				/* tx: Top of 1394 isoch. data_block    */
-	u32 header[] __counted_by(header_length);
+	u32 header_length:8;	/* Size of immediate header		*/
+	u32 header[];		/* tx: Top of 1394 isoch. data_block	*/
 };
 
 #define FW_ISO_CONTEXT_TRANSMIT			0
-- 
cgit v1.2.3


From 017fa3e89187848fd056af757769c9e66ac3e93d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 28 Jul 2024 13:50:01 -0700
Subject: minmax: simplify and clarify min_t()/max_t() implementation

This simplifies the min_t() and max_t() macros by no longer making them
work in the context of a C constant expression.

That means that you can no longer use them for static initializers or
for array sizes in type definitions, but there were only a couple of
such uses, and all of them were converted (famous last words) to use
MIN_T/MAX_T instead.

Cc: David Laight <David.Laight@aculab.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/minmax.h | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index a7ef65f78933..9c2848abc804 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -45,17 +45,20 @@
 
 #define __cmp(op, x, y)	((x) __cmp_op_##op (y) ? (x) : (y))
 
-#define __cmp_once(op, x, y, unique_x, unique_y) ({	\
-	typeof(x) unique_x = (x);			\
-	typeof(y) unique_y = (y);			\
+#define __cmp_once_unique(op, type, x, y, ux, uy) \
+	({ type ux = (x); type uy = (y); __cmp(op, ux, uy); })
+
+#define __cmp_once(op, type, x, y) \
+	__cmp_once_unique(op, type, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_))
+
+#define __careful_cmp_once(op, x, y) ({			\
 	static_assert(__types_ok(x, y),			\
 		#op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \
-	__cmp(op, unique_x, unique_y); })
+	__cmp_once(op, __auto_type, x, y); })
 
 #define __careful_cmp(op, x, y)					\
 	__builtin_choose_expr(__is_constexpr((x) - (y)),	\
-		__cmp(op, x, y),				\
-		__cmp_once(op, x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y)))
+		__cmp(op, x, y), __careful_cmp_once(op, x, y))
 
 #define __clamp(val, lo, hi)	\
 	((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val)))
@@ -158,7 +161,7 @@
  * @x: first value
  * @y: second value
  */
-#define min_t(type, x, y)	__careful_cmp(min, (type)(x), (type)(y))
+#define min_t(type, x, y) __cmp_once(min, type, x, y)
 
 /**
  * max_t - return maximum of two values, using the specified type
@@ -166,7 +169,7 @@
  * @x: first value
  * @y: second value
  */
-#define max_t(type, x, y)	__careful_cmp(max, (type)(x), (type)(y))
+#define max_t(type, x, y) __cmp_once(max, type, x, y)
 
 /*
  * Do not check the array parameter using __must_be_array().
-- 
cgit v1.2.3