From ef246da8e63c486780dca4d9b4d79589cbebf5e5 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sat, 24 Jan 2026 21:14:13 +0200
Subject: dma-buf: Rename .move_notify() callback to a clearer identifier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename the .move_notify() callback to .invalidate_mappings() to make its
purpose explicit and highlight that it is responsible for invalidating
existing mappings.

Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/20260124-dmabuf-revoke-v5-1-f98fca917e96@nvidia.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/linux/dma-buf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 91f4939db89b..d9ee4499b37d 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -407,7 +407,7 @@ struct dma_buf {
 	 *   through the device.
 	 *
 	 * - Dynamic importers should set fences for any access that they can't
-	 *   disable immediately from their &dma_buf_attach_ops.move_notify
+	 *   disable immediately from their &dma_buf_attach_ops.invalidate_mappings
 	 *   callback.
 	 *
 	 * IMPORTANT:
@@ -446,7 +446,7 @@ struct dma_buf_attach_ops {
 	bool allow_peer2peer;
 
 	/**
-	 * @move_notify: [optional] notification that the DMA-buf is moving
+	 * @invalidate_mappings: [optional] notification that the DMA-buf is moving
 	 *
 	 * If this callback is provided the framework can avoid pinning the
 	 * backing store while mappings exists.
@@ -463,7 +463,7 @@ struct dma_buf_attach_ops {
 	 * New mappings can be created after this callback returns, and will
 	 * point to the new location of the DMA-buf.
 	 */
-	void (*move_notify)(struct dma_buf_attachment *attach);
+	void (*invalidate_mappings)(struct dma_buf_attachment *attach);
 };
 
 /**
-- 
cgit v1.2.3


From 95308225e5baeaae1e313816059c59a0036ab6b2 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sat, 24 Jan 2026 21:14:14 +0200
Subject: dma-buf: Rename dma_buf_move_notify() to
 dma_buf_invalidate_mappings()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Along with renaming the .move_notify() callback, rename the corresponding
dma-buf core function. This makes the expected behavior clear to exporters
calling this function.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/20260124-dmabuf-revoke-v5-2-f98fca917e96@nvidia.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 include/linux/dma-buf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index d9ee4499b37d..d0470af8887e 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -588,7 +588,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
 					enum dma_data_direction);
 void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
 				enum dma_data_direction);
-void dma_buf_move_notify(struct dma_buf *dma_buf);
+void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
 int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 			     enum dma_data_direction dir);
 int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
-- 
cgit v1.2.3


From 2bcbc706dfa02ae50118173a6f6d8a12e735480c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 19 Dec 2025 11:41:54 +0100
Subject: dma-buf: add dma_fence_was_initialized function v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some driver use fence->ops to test if a fence was initialized or not.
The problem is that this utilizes internal behavior of the dma_fence
implementation.

So better abstract that into a function.

v2: use a flag instead of testing fence->ops, rename the function, move
    to the beginning of the patch set.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Link: https://lore.kernel.org/r/20260120105655.7134-2-christian.koenig@amd.com
---
 include/linux/dma-fence.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index d4c92fd35092..9c4d25289239 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -48,6 +48,7 @@ struct seq_file;
  * atomic ops (bit_*), so taking the spinlock will not be needed most
  * of the time.
  *
+ * DMA_FENCE_FLAG_INITIALIZED_BIT - fence was initialized
  * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
  * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
  * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
@@ -98,6 +99,7 @@ struct dma_fence {
 };
 
 enum dma_fence_flag_bits {
+	DMA_FENCE_FLAG_INITIALIZED_BIT,
 	DMA_FENCE_FLAG_SEQNO64_BIT,
 	DMA_FENCE_FLAG_SIGNALED_BIT,
 	DMA_FENCE_FLAG_TIMESTAMP_BIT,
@@ -263,6 +265,19 @@ void dma_fence_release(struct kref *kref);
 void dma_fence_free(struct dma_fence *fence);
 void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq);
 
+/**
+ * dma_fence_was_initialized - test if fence was initialized
+ * @fence: fence to test
+ *
+ * Return: True if fence was ever initialized, false otherwise. Works correctly
+ * only when memory backing the fence structure is zero initialized on
+ * allocation.
+ */
+static inline bool dma_fence_was_initialized(struct dma_fence *fence)
+{
+	return fence && test_bit(DMA_FENCE_FLAG_INITIALIZED_BIT, &fence->flags);
+}
+
 /**
  * dma_fence_put - decreases refcount of the fence
  * @fence: fence to reduce refcount of
-- 
cgit v1.2.3


From 4a9671a03f2be13acde0cb15c5208767a9cc56e4 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelagnelf@nvidia.com>
Date: Fri, 6 Feb 2026 08:52:38 +1000
Subject: gpu: Move DRM buddy allocator one level up (part one)

Move the DRM buddy allocator one level up so that it can be used by GPU
drivers (example, nova-core) that have usecases other than DRM (such as
VFIO vGPU support). Modify the API, structures and Kconfigs to use
"gpu_buddy" terminology. Adapt the drivers and tests to use the new API.

The commit cannot be split due to bisectability, however no functional
change is intended. Verified by running K-UNIT tests and build tested
various configurations.

Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
[airlied: I've split this into two so git can find copies easier.
I've also just nuked drm_random library, that stuff needs to be done
elsewhere and only the buddy tests seem to be using it].
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/gpu_buddy.h | 171 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 171 insertions(+)
 create mode 100644 include/linux/gpu_buddy.h

(limited to 'include/linux')

diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
new file mode 100644
index 000000000000..b909fa8f810a
--- /dev/null
+++ b/include/linux/gpu_buddy.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __DRM_BUDDY_H__
+#define __DRM_BUDDY_H__
+
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/rbtree.h>
+
+struct drm_printer;
+
+#define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
+#define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
+#define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
+#define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
+#define DRM_BUDDY_CLEARED			BIT(4)
+#define DRM_BUDDY_TRIM_DISABLE			BIT(5)
+
+struct drm_buddy_block {
+#define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
+#define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
+#define   DRM_BUDDY_ALLOCATED	   (1 << 10)
+#define   DRM_BUDDY_FREE	   (2 << 10)
+#define   DRM_BUDDY_SPLIT	   (3 << 10)
+#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
+/* Free to be used, if needed in the future */
+#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
+#define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
+	u64 header;
+
+	struct drm_buddy_block *left;
+	struct drm_buddy_block *right;
+	struct drm_buddy_block *parent;
+
+	void *private; /* owned by creator */
+
+	/*
+	 * While the block is allocated by the user through drm_buddy_alloc*,
+	 * the user has ownership of the link, for example to maintain within
+	 * a list, if so desired. As soon as the block is freed with
+	 * drm_buddy_free* ownership is given back to the mm.
+	 */
+	union {
+		struct rb_node rb;
+		struct list_head link;
+	};
+
+	struct list_head tmp_link;
+};
+
+/* Order-zero must be at least SZ_4K */
+#define DRM_BUDDY_MAX_ORDER (63 - 12)
+
+/*
+ * Binary Buddy System.
+ *
+ * Locking should be handled by the user, a simple mutex around
+ * drm_buddy_alloc* and drm_buddy_free* should suffice.
+ */
+struct drm_buddy {
+	/* Maintain a free list for each order. */
+	struct rb_root **free_trees;
+
+	/*
+	 * Maintain explicit binary tree(s) to track the allocation of the
+	 * address space. This gives us a simple way of finding a buddy block
+	 * and performing the potentially recursive merge step when freeing a
+	 * block.  Nodes are either allocated or free, in which case they will
+	 * also exist on the respective free list.
+	 */
+	struct drm_buddy_block **roots;
+
+	/*
+	 * Anything from here is public, and remains static for the lifetime of
+	 * the mm. Everything above is considered do-not-touch.
+	 */
+	unsigned int n_roots;
+	unsigned int max_order;
+
+	/* Must be at least SZ_4K */
+	u64 chunk_size;
+	u64 size;
+	u64 avail;
+	u64 clear_avail;
+};
+
+static inline u64
+drm_buddy_block_offset(const struct drm_buddy_block *block)
+{
+	return block->header & DRM_BUDDY_HEADER_OFFSET;
+}
+
+static inline unsigned int
+drm_buddy_block_order(struct drm_buddy_block *block)
+{
+	return block->header & DRM_BUDDY_HEADER_ORDER;
+}
+
+static inline unsigned int
+drm_buddy_block_state(struct drm_buddy_block *block)
+{
+	return block->header & DRM_BUDDY_HEADER_STATE;
+}
+
+static inline bool
+drm_buddy_block_is_allocated(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
+}
+
+static inline bool
+drm_buddy_block_is_clear(struct drm_buddy_block *block)
+{
+	return block->header & DRM_BUDDY_HEADER_CLEAR;
+}
+
+static inline bool
+drm_buddy_block_is_free(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_state(block) == DRM_BUDDY_FREE;
+}
+
+static inline bool
+drm_buddy_block_is_split(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_state(block) == DRM_BUDDY_SPLIT;
+}
+
+static inline u64
+drm_buddy_block_size(struct drm_buddy *mm,
+		     struct drm_buddy_block *block)
+{
+	return mm->chunk_size << drm_buddy_block_order(block);
+}
+
+int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
+
+void drm_buddy_fini(struct drm_buddy *mm);
+
+struct drm_buddy_block *
+drm_get_buddy(struct drm_buddy_block *block);
+
+int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+			   u64 start, u64 end, u64 size,
+			   u64 min_page_size,
+			   struct list_head *blocks,
+			   unsigned long flags);
+
+int drm_buddy_block_trim(struct drm_buddy *mm,
+			 u64 *start,
+			 u64 new_size,
+			 struct list_head *blocks);
+
+void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear);
+
+void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
+
+void drm_buddy_free_list(struct drm_buddy *mm,
+			 struct list_head *objects,
+			 unsigned int flags);
+
+void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
+void drm_buddy_block_print(struct drm_buddy *mm,
+			   struct drm_buddy_block *block,
+			   struct drm_printer *p);
+#endif
-- 
cgit v1.2.3


From ba110db8e1bc206c13fd7d985e79b033f53bfdea Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelagnelf@nvidia.com>
Date: Fri, 6 Feb 2026 08:52:38 +1000
Subject: gpu: Move DRM buddy allocator one level up (part two)

Move the DRM buddy allocator one level up so that it can be used by GPU
drivers (example, nova-core) that have usecases other than DRM (such as
VFIO vGPU support). Modify the API, structures and Kconfigs to use
"gpu_buddy" terminology. Adapt the drivers and tests to use the new API.

The commit cannot be split due to bisectability, however no functional
change is intended. Verified by running K-UNIT tests and build tested
various configurations.

Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
[airlied: I've split this into two so git can find copies easier.
I've also just nuked drm_random library, that stuff needs to be done
elsewhere and only the buddy tests seem to be using it].
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/gpu_buddy.h | 124 ++++++++++++++++++++++++----------------------
 1 file changed, 65 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
index b909fa8f810a..07ac65db6d2e 100644
--- a/include/linux/gpu_buddy.h
+++ b/include/linux/gpu_buddy.h
@@ -3,8 +3,8 @@
  * Copyright © 2021 Intel Corporation
  */
 
-#ifndef __DRM_BUDDY_H__
-#define __DRM_BUDDY_H__
+#ifndef __GPU_BUDDY_H__
+#define __GPU_BUDDY_H__
 
 #include <linux/bitops.h>
 #include <linux/list.h>
@@ -12,38 +12,45 @@
 #include <linux/sched.h>
 #include <linux/rbtree.h>
 
-struct drm_printer;
-
-#define DRM_BUDDY_RANGE_ALLOCATION		BIT(0)
-#define DRM_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
-#define DRM_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
-#define DRM_BUDDY_CLEAR_ALLOCATION		BIT(3)
-#define DRM_BUDDY_CLEARED			BIT(4)
-#define DRM_BUDDY_TRIM_DISABLE			BIT(5)
-
-struct drm_buddy_block {
-#define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
-#define DRM_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
-#define   DRM_BUDDY_ALLOCATED	   (1 << 10)
-#define   DRM_BUDDY_FREE	   (2 << 10)
-#define   DRM_BUDDY_SPLIT	   (3 << 10)
-#define DRM_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
+#define GPU_BUDDY_RANGE_ALLOCATION		BIT(0)
+#define GPU_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
+#define GPU_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
+#define GPU_BUDDY_CLEAR_ALLOCATION		BIT(3)
+#define GPU_BUDDY_CLEARED			BIT(4)
+#define GPU_BUDDY_TRIM_DISABLE			BIT(5)
+
+enum gpu_buddy_free_tree {
+	GPU_BUDDY_CLEAR_TREE = 0,
+	GPU_BUDDY_DIRTY_TREE,
+	GPU_BUDDY_MAX_FREE_TREES,
+};
+
+#define for_each_free_tree(tree) \
+	for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++)
+
+struct gpu_buddy_block {
+#define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
+#define GPU_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
+#define   GPU_BUDDY_ALLOCATED	   (1 << 10)
+#define   GPU_BUDDY_FREE	   (2 << 10)
+#define   GPU_BUDDY_SPLIT	   (3 << 10)
+#define GPU_BUDDY_HEADER_CLEAR  GENMASK_ULL(9, 9)
 /* Free to be used, if needed in the future */
-#define DRM_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
-#define DRM_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
+#define GPU_BUDDY_HEADER_UNUSED GENMASK_ULL(8, 6)
+#define GPU_BUDDY_HEADER_ORDER  GENMASK_ULL(5, 0)
 	u64 header;
 
-	struct drm_buddy_block *left;
-	struct drm_buddy_block *right;
-	struct drm_buddy_block *parent;
+	struct gpu_buddy_block *left;
+	struct gpu_buddy_block *right;
+	struct gpu_buddy_block *parent;
 
 	void *private; /* owned by creator */
 
 	/*
-	 * While the block is allocated by the user through drm_buddy_alloc*,
+	 * While the block is allocated by the user through gpu_buddy_alloc*,
 	 * the user has ownership of the link, for example to maintain within
 	 * a list, if so desired. As soon as the block is freed with
-	 * drm_buddy_free* ownership is given back to the mm.
+	 * gpu_buddy_free* ownership is given back to the mm.
 	 */
 	union {
 		struct rb_node rb;
@@ -54,15 +61,15 @@ struct drm_buddy_block {
 };
 
 /* Order-zero must be at least SZ_4K */
-#define DRM_BUDDY_MAX_ORDER (63 - 12)
+#define GPU_BUDDY_MAX_ORDER (63 - 12)
 
 /*
  * Binary Buddy System.
  *
  * Locking should be handled by the user, a simple mutex around
- * drm_buddy_alloc* and drm_buddy_free* should suffice.
+ * gpu_buddy_alloc* and gpu_buddy_free* should suffice.
  */
-struct drm_buddy {
+struct gpu_buddy {
 	/* Maintain a free list for each order. */
 	struct rb_root **free_trees;
 
@@ -73,7 +80,7 @@ struct drm_buddy {
 	 * block.  Nodes are either allocated or free, in which case they will
 	 * also exist on the respective free list.
 	 */
-	struct drm_buddy_block **roots;
+	struct gpu_buddy_block **roots;
 
 	/*
 	 * Anything from here is public, and remains static for the lifetime of
@@ -90,82 +97,81 @@ struct drm_buddy {
 };
 
 static inline u64
-drm_buddy_block_offset(const struct drm_buddy_block *block)
+gpu_buddy_block_offset(const struct gpu_buddy_block *block)
 {
-	return block->header & DRM_BUDDY_HEADER_OFFSET;
+	return block->header & GPU_BUDDY_HEADER_OFFSET;
 }
 
 static inline unsigned int
-drm_buddy_block_order(struct drm_buddy_block *block)
+gpu_buddy_block_order(struct gpu_buddy_block *block)
 {
-	return block->header & DRM_BUDDY_HEADER_ORDER;
+	return block->header & GPU_BUDDY_HEADER_ORDER;
 }
 
 static inline unsigned int
-drm_buddy_block_state(struct drm_buddy_block *block)
+gpu_buddy_block_state(struct gpu_buddy_block *block)
 {
-	return block->header & DRM_BUDDY_HEADER_STATE;
+	return block->header & GPU_BUDDY_HEADER_STATE;
 }
 
 static inline bool
-drm_buddy_block_is_allocated(struct drm_buddy_block *block)
+gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
 {
-	return drm_buddy_block_state(block) == DRM_BUDDY_ALLOCATED;
+	return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
 }
 
 static inline bool
-drm_buddy_block_is_clear(struct drm_buddy_block *block)
+gpu_buddy_block_is_clear(struct gpu_buddy_block *block)
 {
-	return block->header & DRM_BUDDY_HEADER_CLEAR;
+	return block->header & GPU_BUDDY_HEADER_CLEAR;
 }
 
 static inline bool
-drm_buddy_block_is_free(struct drm_buddy_block *block)
+gpu_buddy_block_is_free(struct gpu_buddy_block *block)
 {
-	return drm_buddy_block_state(block) == DRM_BUDDY_FREE;
+	return gpu_buddy_block_state(block) == GPU_BUDDY_FREE;
 }
 
 static inline bool
-drm_buddy_block_is_split(struct drm_buddy_block *block)
+gpu_buddy_block_is_split(struct gpu_buddy_block *block)
 {
-	return drm_buddy_block_state(block) == DRM_BUDDY_SPLIT;
+	return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
 }
 
 static inline u64
-drm_buddy_block_size(struct drm_buddy *mm,
-		     struct drm_buddy_block *block)
+gpu_buddy_block_size(struct gpu_buddy *mm,
+		     struct gpu_buddy_block *block)
 {
-	return mm->chunk_size << drm_buddy_block_order(block);
+	return mm->chunk_size << gpu_buddy_block_order(block);
 }
 
-int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
+int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size);
 
-void drm_buddy_fini(struct drm_buddy *mm);
+void gpu_buddy_fini(struct gpu_buddy *mm);
 
-struct drm_buddy_block *
-drm_get_buddy(struct drm_buddy_block *block);
+struct gpu_buddy_block *
+gpu_get_buddy(struct gpu_buddy_block *block);
 
-int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
 			   u64 start, u64 end, u64 size,
 			   u64 min_page_size,
 			   struct list_head *blocks,
 			   unsigned long flags);
 
-int drm_buddy_block_trim(struct drm_buddy *mm,
+int gpu_buddy_block_trim(struct gpu_buddy *mm,
 			 u64 *start,
 			 u64 new_size,
 			 struct list_head *blocks);
 
-void drm_buddy_reset_clear(struct drm_buddy *mm, bool is_clear);
+void gpu_buddy_reset_clear(struct gpu_buddy *mm, bool is_clear);
 
-void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
+void gpu_buddy_free_block(struct gpu_buddy *mm, struct gpu_buddy_block *block);
 
-void drm_buddy_free_list(struct drm_buddy *mm,
+void gpu_buddy_free_list(struct gpu_buddy *mm,
 			 struct list_head *objects,
 			 unsigned int flags);
 
-void drm_buddy_print(struct drm_buddy *mm, struct drm_printer *p);
-void drm_buddy_block_print(struct drm_buddy *mm,
-			   struct drm_buddy_block *block,
-			   struct drm_printer *p);
+void gpu_buddy_print(struct gpu_buddy *mm);
+void gpu_buddy_block_print(struct gpu_buddy *mm,
+			   struct gpu_buddy_block *block);
 #endif
-- 
cgit v1.2.3


From 95cef38e70250234a254e6228eb7342b6deaaffa Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 17 Feb 2026 16:56:18 +0100
Subject: firmware: google: Export coreboot table entries

Move types for coreboot table entries to <linux/coreboot.h>. Allows
drivers in other subsystems to use these structures.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Tzung-Bi Shih <tzungbi@kernel.org>
Acked-by: Julius Werner <jwerner@chromium.org>
Link: https://patch.msgid.link/20260217155836.96267-9-tzimmermann@suse.de
---
 include/linux/coreboot.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 include/linux/coreboot.h

(limited to 'include/linux')

diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h
new file mode 100644
index 000000000000..48705b439c6e
--- /dev/null
+++ b/include/linux/coreboot.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * coreboot.h
+ *
+ * Coreboot device and driver interfaces.
+ *
+ * Copyright 2014 Gerd Hoffmann <kraxel@redhat.com>
+ * Copyright 2017 Google Inc.
+ * Copyright 2017 Samuel Holland <samuel@sholland.org>
+ */
+
+#ifndef _LINUX_COREBOOT_H
+#define _LINUX_COREBOOT_H
+
+#include <linux/types.h>
+
+/* List of coreboot entry structures that is used */
+
+#define CB_TAG_FRAMEBUFFER 0x12
+#define LB_TAG_CBMEM_ENTRY 0x31
+
+/* Generic */
+struct coreboot_table_entry {
+	u32 tag;
+	u32 size;
+};
+
+/* Points to a CBMEM entry */
+struct lb_cbmem_ref {
+	u32 tag;
+	u32 size;
+
+	u64 cbmem_addr;
+};
+
+/* Corresponds to LB_TAG_CBMEM_ENTRY */
+struct lb_cbmem_entry {
+	u32 tag;
+	u32 size;
+
+	u64 address;
+	u32 entry_size;
+	u32 id;
+};
+
+/* Describes framebuffer setup by coreboot */
+struct lb_framebuffer {
+	u32 tag;
+	u32 size;
+
+	u64 physical_address;
+	u32 x_resolution;
+	u32 y_resolution;
+	u32 bytes_per_line;
+	u8  bits_per_pixel;
+	u8  red_mask_pos;
+	u8  red_mask_size;
+	u8  green_mask_pos;
+	u8  green_mask_size;
+	u8  blue_mask_pos;
+	u8  blue_mask_size;
+	u8  reserved_mask_pos;
+	u8  reserved_mask_size;
+};
+
+#endif /* _LINUX_COREBOOT_H */
-- 
cgit v1.2.3


From 27fc52b5505a3acca96b884a4bf1345344e5a566 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 17 Feb 2026 16:56:19 +0100
Subject: firmware: google: Pack structures for coreboot table entries

Pack the fields in the coreboot table entries. These entries are part of
the coreboot ABI, so they don't follow regular calling conventions. Fields
of type u64 are aligned to boundaries of 4 bytes instead of 8. [1]

So far this has not been a problem. In the future, padding bytes should
be added where explicit alignment is required.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://github.com/coreboot/coreboot/blob/main/payloads/libpayload/include/coreboot_tables.h#L96 # [1]
Suggested-by: Julius Werner <jwerner@chromium.org>
Acked-by: Julius Werner <jwerner@chromium.org>
Acked-by: Tzung-Bi Shih <tzungbi@kernel.org>
Link: https://patch.msgid.link/20260217155836.96267-10-tzimmermann@suse.de
---
 include/linux/coreboot.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h
index 48705b439c6e..5746b99a070d 100644
--- a/include/linux/coreboot.h
+++ b/include/linux/coreboot.h
@@ -12,8 +12,11 @@
 #ifndef _LINUX_COREBOOT_H
 #define _LINUX_COREBOOT_H
 
+#include <linux/compiler_attributes.h>
 #include <linux/types.h>
 
+typedef __aligned(4) u64 cb_u64;
+
 /* List of coreboot entry structures that is used */
 
 #define CB_TAG_FRAMEBUFFER 0x12
@@ -30,7 +33,7 @@ struct lb_cbmem_ref {
 	u32 tag;
 	u32 size;
 
-	u64 cbmem_addr;
+	cb_u64 cbmem_addr;
 };
 
 /* Corresponds to LB_TAG_CBMEM_ENTRY */
@@ -38,7 +41,7 @@ struct lb_cbmem_entry {
 	u32 tag;
 	u32 size;
 
-	u64 address;
+	cb_u64 address;
 	u32 entry_size;
 	u32 id;
 };
@@ -48,7 +51,7 @@ struct lb_framebuffer {
 	u32 tag;
 	u32 size;
 
-	u64 physical_address;
+	cb_u64 physical_address;
 	u32 x_resolution;
 	u32 y_resolution;
 	u32 bytes_per_line;
-- 
cgit v1.2.3


From a29a1f0ec8d69ee917a9d4c84b844df0decff0ef Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 17 Feb 2026 16:56:21 +0100
Subject: drm/sysfb: corebootdrm: Add DRM driver for coreboot framebuffers

Add corebootdrm, a DRM driver for coreboot framebuffers. The driver
supports a pre-initialized framebuffer with various packed RGB formats.
The driver code is fairly small and uses the same logic as the other
sysfb drivers. Most of the implementation comes from existing sysfb
helpers.

Until now, coreboot relied on simpledrm or simplefb for boot-up graphics
output. Initialize the platform device for corebootdrm in the same place
in framebuffer_probe(). With a later commit, the simple-framebuffer should
be removed.

v4:
- sort include statements (Tzung-Bi)
v3:
- comment on _HAS_LFB semantics (Tzung-Bi)
- fix typo in commit description (Tzung-Bi)
- comment on simple-framebuffer being obsolete for coreboot
v2:
- reimplement as platform driver
- limit resources and mappings to known framebuffer memory; no
  page alignment
- create corebootdrm device from coreboot framebuffer code

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Julius Werner <jwerner@chromium.org>
Acked-by: Tzung-Bi Shih <tzungbi@kernel.org> # coreboot
Link: https://patch.msgid.link/20260217155836.96267-12-tzimmermann@suse.de
---
 include/linux/coreboot.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h
index 5746b99a070d..885da106fee3 100644
--- a/include/linux/coreboot.h
+++ b/include/linux/coreboot.h
@@ -13,6 +13,7 @@
 #define _LINUX_COREBOOT_H
 
 #include <linux/compiler_attributes.h>
+#include <linux/stddef.h>
 #include <linux/types.h>
 
 typedef __aligned(4) u64 cb_u64;
@@ -66,4 +67,11 @@ struct lb_framebuffer {
 	u8  reserved_mask_size;
 };
 
+/*
+ * True if the coreboot-provided data is large enough to hold information
+ * on the linear framebuffer. False otherwise.
+ */
+#define LB_FRAMEBUFFER_HAS_LFB(__fb) \
+	((__fb)->size >= offsetofend(struct lb_framebuffer, reserved_mask_size))
+
 #endif /* _LINUX_COREBOOT_H */
-- 
cgit v1.2.3


From 058fc04b8587ad07a86dfa8f99d8d99db0a55443 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 17 Feb 2026 16:56:22 +0100
Subject: drm/sysfb: corebootdrm: Support panel orientation

Add fields and constants for coreboot framebuffer orientation. Set
corebootdrm's DRM connector state from the values. Not all firmware
provides orientation, so make it optional. Systems without, continue
to use unknown orientation.

v3:
- comment on _HAS_ORIENTATION semantics (Tzung-Bi)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Julius Werner <jwerner@chromium.org>
Acked-by: Tzung-Bi Shih <tzungbi@kernel.org> # coreboot
Link: https://patch.msgid.link/20260217155836.96267-13-tzimmermann@suse.de
---
 include/linux/coreboot.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/coreboot.h b/include/linux/coreboot.h
index 885da106fee3..5d40ca7a1d89 100644
--- a/include/linux/coreboot.h
+++ b/include/linux/coreboot.h
@@ -47,6 +47,11 @@ struct lb_cbmem_entry {
 	u32 id;
 };
 
+#define LB_FRAMEBUFFER_ORIENTATION_NORMAL	0
+#define LB_FRAMEBUFFER_ORIENTATION_BOTTOM_UP	1
+#define LB_FRAMEBUFFER_ORIENTATION_LEFT_UP	2
+#define LB_FRAMEBUFFER_ORIENTATION_RIGHT_UP	3
+
 /* Describes framebuffer setup by coreboot */
 struct lb_framebuffer {
 	u32 tag;
@@ -65,6 +70,7 @@ struct lb_framebuffer {
 	u8  blue_mask_size;
 	u8  reserved_mask_pos;
 	u8  reserved_mask_size;
+	u8  orientation;
 };
 
 /*
@@ -74,4 +80,11 @@ struct lb_framebuffer {
 #define LB_FRAMEBUFFER_HAS_LFB(__fb) \
 	((__fb)->size >= offsetofend(struct lb_framebuffer, reserved_mask_size))
 
+/*
+ * True if the coreboot-provided data is large enough to hold information
+ * on the display orientation. False otherwise.
+ */
+#define LB_FRAMEBUFFER_HAS_ORIENTATION(__fb) \
+	((__fb)->size >= offsetofend(struct lb_framebuffer, orientation))
+
 #endif /* _LINUX_COREBOOT_H */
-- 
cgit v1.2.3


From 5cab6d386bd30c3bb4efceb05b25842a6f144693 Mon Sep 17 00:00:00 2001
From: Sanjay Yadav <sanjay.kumar.yadav@intel.com>
Date: Thu, 12 Feb 2026 14:55:29 +0530
Subject: drm/buddy: Add kernel-doc for allocator structures and flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add missing kernel-doc for GPU buddy allocator flags,
gpu_buddy_block, and gpu_buddy. The documentation covers block
header fields, allocator roots, free trees, and allocation flags
such as RANGE, TOPDOWN, CONTIGUOUS, CLEAR, and TRIM_DISABLE.
Private members are marked with kernel-doc private markers
and documented with regular comments.

No functional changes.

v2:
- Corrected GPU_BUDDY_CLEAR_TREE and GPU_BUDDY_DIRTY_TREE index
  values (Arun)
- Rebased after DRM buddy allocator moved to drivers/gpu/
- Updated commit message

v3:
- Document reserved bits 8:6 in header layout (Arun)
- Fix checkpatch warning

Cc: Christian König <christian.koenig@amd.com>
Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Sanjay Yadav <sanjay.kumar.yadav@intel.com>
Reviewed-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Link: https://patch.msgid.link/20260212092527.718455-5-sanjay.kumar.yadav@intel.com
---
 include/linux/gpu_buddy.h | 123 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 103 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
index 07ac65db6d2e..bf2a42256536 100644
--- a/include/linux/gpu_buddy.h
+++ b/include/linux/gpu_buddy.h
@@ -12,11 +12,58 @@
 #include <linux/sched.h>
 #include <linux/rbtree.h>
 
+/**
+ * GPU_BUDDY_RANGE_ALLOCATION - Allocate within a specific address range
+ *
+ * When set, allocation is restricted to the range [start, end) specified
+ * in gpu_buddy_alloc_blocks(). Without this flag, start/end are ignored
+ * and allocation can use any free space.
+ */
 #define GPU_BUDDY_RANGE_ALLOCATION		BIT(0)
+
+/**
+ * GPU_BUDDY_TOPDOWN_ALLOCATION - Allocate from top of address space
+ *
+ * Allocate starting from high addresses and working down. Useful for
+ * separating different allocation types (e.g., kernel vs userspace)
+ * to reduce fragmentation.
+ */
 #define GPU_BUDDY_TOPDOWN_ALLOCATION		BIT(1)
+
+/**
+ * GPU_BUDDY_CONTIGUOUS_ALLOCATION - Require physically contiguous blocks
+ *
+ * The allocation must be satisfied with a single contiguous block.
+ * If the requested size cannot be allocated contiguously, the
+ * allocation fails with -ENOSPC.
+ */
 #define GPU_BUDDY_CONTIGUOUS_ALLOCATION		BIT(2)
+
+/**
+ * GPU_BUDDY_CLEAR_ALLOCATION - Prefer pre-cleared (zeroed) memory
+ *
+ * Attempt to allocate from the clear tree first. If insufficient clear
+ * memory is available, falls back to dirty memory. Useful when the
+ * caller needs zeroed memory and wants to avoid GPU clear operations.
+ */
 #define GPU_BUDDY_CLEAR_ALLOCATION		BIT(3)
+
+/**
+ * GPU_BUDDY_CLEARED - Mark returned blocks as cleared
+ *
+ * Used with gpu_buddy_free_list() to indicate that the memory being
+ * freed has been cleared (zeroed). The blocks will be placed in the
+ * clear tree for future GPU_BUDDY_CLEAR_ALLOCATION requests.
+ */
 #define GPU_BUDDY_CLEARED			BIT(4)
+
+/**
+ * GPU_BUDDY_TRIM_DISABLE - Disable automatic block trimming
+ *
+ * By default, if an allocation is smaller than the allocated block,
+ * excess memory is trimmed and returned to the free pool. This flag
+ * disables trimming, keeping the full power-of-two block size.
+ */
 #define GPU_BUDDY_TRIM_DISABLE			BIT(5)
 
 enum gpu_buddy_free_tree {
@@ -28,7 +75,28 @@ enum gpu_buddy_free_tree {
 #define for_each_free_tree(tree) \
 	for ((tree) = 0; (tree) < GPU_BUDDY_MAX_FREE_TREES; (tree)++)
 
+/**
+ * struct gpu_buddy_block - Block within a buddy allocator
+ *
+ * Each block in the buddy allocator is represented by this structure.
+ * Blocks are organized in a binary tree where each parent block can be
+ * split into two children (left and right buddies). The allocator manages
+ * blocks at various orders (power-of-2 sizes) from chunk_size up to the
+ * largest contiguous region.
+ *
+ * @private: Private data owned by the allocator user (e.g., driver-specific data)
+ * @link: List node for user ownership while block is allocated
+ */
 struct gpu_buddy_block {
+/* private: */
+	/*
+	 * Header bit layout:
+	 * - Bits 63:12: block offset within the address space
+	 * - Bits 11:10: state (ALLOCATED, FREE, or SPLIT)
+	 * - Bit 9: clear bit (1 if memory is zeroed)
+	 * - Bits 8:6: reserved
+	 * - Bits 5:0: order (log2 of size relative to chunk_size)
+	 */
 #define GPU_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
 #define GPU_BUDDY_HEADER_STATE  GENMASK_ULL(11, 10)
 #define   GPU_BUDDY_ALLOCATED	   (1 << 10)
@@ -43,7 +111,7 @@ struct gpu_buddy_block {
 	struct gpu_buddy_block *left;
 	struct gpu_buddy_block *right;
 	struct gpu_buddy_block *parent;
-
+/* public: */
 	void *private; /* owned by creator */
 
 	/*
@@ -53,43 +121,58 @@ struct gpu_buddy_block {
 	 * gpu_buddy_free* ownership is given back to the mm.
 	 */
 	union {
+/* private: */
 		struct rb_node rb;
+/* public: */
 		struct list_head link;
 	};
-
+/* private: */
 	struct list_head tmp_link;
 };
 
 /* Order-zero must be at least SZ_4K */
 #define GPU_BUDDY_MAX_ORDER (63 - 12)
 
-/*
- * Binary Buddy System.
+/**
+ * struct gpu_buddy - GPU binary buddy allocator
+ *
+ * The buddy allocator provides efficient power-of-two memory allocation
+ * with fast allocation and free operations. It is commonly used for GPU
+ * memory management where allocations can be split into power-of-two
+ * block sizes.
  *
- * Locking should be handled by the user, a simple mutex around
- * gpu_buddy_alloc* and gpu_buddy_free* should suffice.
+ * Locking should be handled by the user; a simple mutex around
+ * gpu_buddy_alloc_blocks() and gpu_buddy_free_block()/gpu_buddy_free_list()
+ * should suffice.
+ *
+ * @n_roots: Number of root blocks in the roots array.
+ * @max_order: Maximum block order (log2 of largest block size / chunk_size).
+ * @chunk_size: Minimum allocation granularity in bytes. Must be at least SZ_4K.
+ * @size: Total size of the address space managed by this allocator in bytes.
+ * @avail: Total free space currently available for allocation in bytes.
+ * @clear_avail: Free space available in the clear tree (zeroed memory) in bytes.
+ *               This is a subset of @avail.
  */
 struct gpu_buddy {
-	/* Maintain a free list for each order. */
-	struct rb_root **free_trees;
-
+/* private: */
 	/*
-	 * Maintain explicit binary tree(s) to track the allocation of the
-	 * address space. This gives us a simple way of finding a buddy block
-	 * and performing the potentially recursive merge step when freeing a
-	 * block.  Nodes are either allocated or free, in which case they will
-	 * also exist on the respective free list.
+	 * Array of red-black trees for free block management.
+	 * Indexed as free_trees[clear/dirty][order] where:
+	 * - Index 0 (GPU_BUDDY_CLEAR_TREE): blocks with zeroed content
+	 * - Index 1 (GPU_BUDDY_DIRTY_TREE): blocks with unknown content
+	 * Each tree holds free blocks of the corresponding order.
 	 */
-	struct gpu_buddy_block **roots;
-
+	struct rb_root **free_trees;
 	/*
-	 * Anything from here is public, and remains static for the lifetime of
-	 * the mm. Everything above is considered do-not-touch.
+	 * Array of root blocks representing the top-level blocks of the
+	 * binary tree(s). Multiple roots exist when the total size is not
+	 * a power of two, with each root being the largest power-of-two
+	 * that fits in the remaining space.
 	 */
+	struct gpu_buddy_block **roots;
+/* public: */
 	unsigned int n_roots;
 	unsigned int max_order;
-
-	/* Must be at least SZ_4K */
 	u64 chunk_size;
 	u64 size;
 	u64 avail;
-- 
cgit v1.2.3


From df8c7892e06efa5df2aa780a338f33a4f666370b Mon Sep 17 00:00:00 2001
From: Sanjay Yadav <sanjay.kumar.yadav@intel.com>
Date: Thu, 12 Feb 2026 14:55:30 +0530
Subject: drm/buddy: Move internal helpers to buddy.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move gpu_buddy_block_state(), gpu_buddy_block_is_allocated(),
and gpu_buddy_block_is_split() from gpu_buddy.h to gpu_buddy.c
as static functions since they have no external callers.

Remove gpu_get_buddy() as it was an unused exported wrapper
around the internal __get_buddy().

No functional changes.

v2:
- Rebased after DRM buddy allocator moved to drivers/gpu/
- Keep gpu_buddy_block_is_free() in header since it's now
  used by drm_buddy.c
- Updated commit message

Cc: Christian König <christian.koenig@amd.com>
Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Suggested-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Sanjay Yadav <sanjay.kumar.yadav@intel.com>
Reviewed-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Link: https://patch.msgid.link/20260212092527.718455-6-sanjay.kumar.yadav@intel.com
---
 include/linux/gpu_buddy.h | 25 ++-----------------------
 1 file changed, 2 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpu_buddy.h b/include/linux/gpu_buddy.h
index bf2a42256536..f1fb6eff604a 100644
--- a/include/linux/gpu_buddy.h
+++ b/include/linux/gpu_buddy.h
@@ -191,16 +191,10 @@ gpu_buddy_block_order(struct gpu_buddy_block *block)
 	return block->header & GPU_BUDDY_HEADER_ORDER;
 }
 
-static inline unsigned int
-gpu_buddy_block_state(struct gpu_buddy_block *block)
-{
-	return block->header & GPU_BUDDY_HEADER_STATE;
-}
-
 static inline bool
-gpu_buddy_block_is_allocated(struct gpu_buddy_block *block)
+gpu_buddy_block_is_free(struct gpu_buddy_block *block)
 {
-	return gpu_buddy_block_state(block) == GPU_BUDDY_ALLOCATED;
+	return (block->header & GPU_BUDDY_HEADER_STATE) == GPU_BUDDY_FREE;
 }
 
 static inline bool
@@ -209,18 +203,6 @@ gpu_buddy_block_is_clear(struct gpu_buddy_block *block)
 	return block->header & GPU_BUDDY_HEADER_CLEAR;
 }
 
-static inline bool
-gpu_buddy_block_is_free(struct gpu_buddy_block *block)
-{
-	return gpu_buddy_block_state(block) == GPU_BUDDY_FREE;
-}
-
-static inline bool
-gpu_buddy_block_is_split(struct gpu_buddy_block *block)
-{
-	return gpu_buddy_block_state(block) == GPU_BUDDY_SPLIT;
-}
-
 static inline u64
 gpu_buddy_block_size(struct gpu_buddy *mm,
 		     struct gpu_buddy_block *block)
@@ -232,9 +214,6 @@ int gpu_buddy_init(struct gpu_buddy *mm, u64 size, u64 chunk_size);
 
 void gpu_buddy_fini(struct gpu_buddy *mm);
 
-struct gpu_buddy_block *
-gpu_get_buddy(struct gpu_buddy_block *block);
-
 int gpu_buddy_alloc_blocks(struct gpu_buddy *mm,
 			   u64 start, u64 end, u64 size,
 			   u64 min_page_size,
-- 
cgit v1.2.3


From f4cc3ab824d6772a48ca9d9c74ac623b3309985d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 7 Oct 2025 14:06:05 +0200
Subject: dma-buf: protected fence ops by RCU v8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fence ops of a dma_fence currently need to life as long as the
dma_fence is alive. This means that the module which originally issued
a dma_fence can't unload unless all fences are freed up.

As first step to solve this issue protect the fence ops by RCU.

While it is counter intuitive to protect a constant function pointer table
by RCU it allows modules to wait for an RCU grace period before they
unload, to make sure that nobody is executing their functions any more.

This patch has not much functional change, but only adds the RCU
handling for the static checker to test.

v2: make one the now duplicated lockdep warnings a comment instead.
v3: Add more documentation to ->wait and ->release callback.
v4: fix typo in documentation
v5: rebased on drm-tip
v6: improve code comments
v7: improve commit message and code comments
v8: fix sparse rcu warnings

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://lore.kernel.org/r/20260219160822.1529-2-christian.koenig@amd.com
---
 include/linux/dma-fence.h | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 9c4d25289239..fa3cfe3e98ac 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -67,7 +67,7 @@ struct seq_file;
  */
 struct dma_fence {
 	spinlock_t *lock;
-	const struct dma_fence_ops *ops;
+	const struct dma_fence_ops __rcu *ops;
 	/*
 	 * We clear the callback list on kref_put so that by the time we
 	 * release the fence it is unused. No one should be adding to the
@@ -220,6 +220,10 @@ struct dma_fence_ops {
 	 * timed out. Can also return other error values on custom implementations,
 	 * which should be treated as if the fence is signaled. For example a hardware
 	 * lockup could be reported like that.
+	 *
+	 * Implementing this callback prevents the fence from detaching after
+	 * signaling and so it is necessary for the module providing the
+	 * dma_fence_ops to stay loaded as long as the dma_fence exists.
 	 */
 	signed long (*wait)(struct dma_fence *fence,
 			    bool intr, signed long timeout);
@@ -231,6 +235,13 @@ struct dma_fence_ops {
 	 * Can be called from irq context.  This callback is optional. If it is
 	 * NULL, then dma_fence_free() is instead called as the default
 	 * implementation.
+	 *
+	 * Implementing this callback prevents the fence from detaching after
+	 * signaling and so it is necessary for the module providing the
+	 * dma_fence_ops to stay loaded as long as the dma_fence exists.
+	 *
+	 * If the callback is implemented the memory backing the dma_fence
+	 * object must be freed RCU safe.
 	 */
 	void (*release)(struct dma_fence *fence);
 
@@ -454,13 +465,19 @@ dma_fence_test_signaled_flag(struct dma_fence *fence)
 static inline bool
 dma_fence_is_signaled_locked(struct dma_fence *fence)
 {
+	const struct dma_fence_ops *ops;
+
 	if (dma_fence_test_signaled_flag(fence))
 		return true;
 
-	if (fence->ops->signaled && fence->ops->signaled(fence)) {
+	rcu_read_lock();
+	ops = rcu_dereference(fence->ops);
+	if (ops->signaled && ops->signaled(fence)) {
+		rcu_read_unlock();
 		dma_fence_signal_locked(fence);
 		return true;
 	}
+	rcu_read_unlock();
 
 	return false;
 }
@@ -484,13 +501,19 @@ dma_fence_is_signaled_locked(struct dma_fence *fence)
 static inline bool
 dma_fence_is_signaled(struct dma_fence *fence)
 {
+	const struct dma_fence_ops *ops;
+
 	if (dma_fence_test_signaled_flag(fence))
 		return true;
 
-	if (fence->ops->signaled && fence->ops->signaled(fence)) {
+	rcu_read_lock();
+	ops = rcu_dereference(fence->ops);
+	if (ops->signaled && ops->signaled(fence)) {
+		rcu_read_unlock();
 		dma_fence_signal(fence);
 		return true;
 	}
+	rcu_read_unlock();
 
 	return false;
 }
@@ -695,7 +718,7 @@ extern const struct dma_fence_ops dma_fence_chain_ops;
  */
 static inline bool dma_fence_is_array(struct dma_fence *fence)
 {
-	return fence->ops == &dma_fence_array_ops;
+	return rcu_access_pointer(fence->ops) == &dma_fence_array_ops;
 }
 
 /**
@@ -706,7 +729,7 @@ static inline bool dma_fence_is_array(struct dma_fence *fence)
  */
 static inline bool dma_fence_is_chain(struct dma_fence *fence)
 {
-	return fence->ops == &dma_fence_chain_ops;
+	return rcu_access_pointer(fence->ops) == &dma_fence_chain_ops;
 }
 
 /**
-- 
cgit v1.2.3


From 541c8f2468b933acc5d129e84bd264923675a66e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Wed, 8 Oct 2025 18:12:46 +0200
Subject: dma-buf: detach fence ops on signal v3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When neither a release nor a wait backend ops is specified it is possible
to let the dma_fence live on independently of the module who issued it.

This makes it possible to unload drivers and only wait for all their
fences to signal.

v2: fix typo in comment
v3: fix sparse rcu warnings

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Philipp Stanner <phasta@kernel.org>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://lore.kernel.org/r/20260219160822.1529-3-christian.koenig@amd.com
---
 include/linux/dma-fence.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index fa3cfe3e98ac..9ff2c4a09cdc 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -472,7 +472,7 @@ dma_fence_is_signaled_locked(struct dma_fence *fence)
 
 	rcu_read_lock();
 	ops = rcu_dereference(fence->ops);
-	if (ops->signaled && ops->signaled(fence)) {
+	if (ops && ops->signaled && ops->signaled(fence)) {
 		rcu_read_unlock();
 		dma_fence_signal_locked(fence);
 		return true;
@@ -508,7 +508,7 @@ dma_fence_is_signaled(struct dma_fence *fence)
 
 	rcu_read_lock();
 	ops = rcu_dereference(fence->ops);
-	if (ops->signaled && ops->signaled(fence)) {
+	if (ops && ops->signaled && ops->signaled(fence)) {
 		rcu_read_unlock();
 		dma_fence_signal(fence);
 		return true;
-- 
cgit v1.2.3


From 3e5067931b5df667f5350fafe4410554e228e53e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Thu, 9 Oct 2025 10:40:06 +0200
Subject: dma-buf: abstract fence locking v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add dma_fence_lock_irqsafe() and dma_fence_unlock_irqrestore() wrappers
and mechanically apply them everywhere.

Just a pre-requisite cleanup for a follow up patch.

v2: add some missing i915 bits, add abstraction for lockdep assertion as
    well
v3: one more suggestion by Tvrtko

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Link: https://lore.kernel.org/r/20260219160822.1529-4-christian.koenig@amd.com
---
 include/linux/dma-fence.h | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 9ff2c4a09cdc..85d6eac9fa85 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -377,6 +377,44 @@ dma_fence_get_rcu_safe(struct dma_fence __rcu **fencep)
 	} while (1);
 }
 
+/**
+ * dma_fence_spinlock - return pointer to the spinlock protecting the fence
+ * @fence: the fence to get the lock from
+ *
+ * Return the pointer to the extern lock.
+ */
+static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
+{
+	return fence->lock;
+}
+
+/**
+ * dma_fence_lock_irqsave - irqsave lock the fence
+ * @fence: the fence to lock
+ * @flags: where to store the CPU flags.
+ *
+ * Lock the fence, preventing it from changing to the signaled state.
+ */
+#define dma_fence_lock_irqsave(fence, flags)	\
+	spin_lock_irqsave(fence->lock, flags)
+
+/**
+ * dma_fence_unlock_irqrestore - unlock the fence and irqrestore
+ * @fence: the fence to unlock
+ * @flags the CPU flags to restore
+ *
+ * Unlock the fence, allowing it to change it's state to signaled again.
+ */
+#define dma_fence_unlock_irqrestore(fence, flags)	\
+	spin_unlock_irqrestore(fence->lock, flags)
+
+/**
+ * dma_fence_assert_held - lockdep assertion that fence is locked
+ * @fence: the fence which should be locked
+ */
+#define dma_fence_assert_held(fence)	\
+	lockdep_assert_held(dma_fence_spinlock(fence));
+
 #ifdef CONFIG_LOCKDEP
 bool dma_fence_begin_signalling(void);
 void dma_fence_end_signalling(bool cookie);
-- 
cgit v1.2.3


From 1f32f310a13c9fb67a9993ab67f596b3f960206f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Thu, 9 Oct 2025 10:40:06 +0200
Subject: dma-buf: inline spinlock for fence protection v5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement per-fence spinlocks, allowing implementations to not give an
external spinlock to protect the fence internal state. Instead a spinlock
embedded into the fence structure itself is used in this case.

Shared spinlocks have the problem that implementations need to guarantee
that the lock lives at least as long all fences referencing them.

Using a per-fence spinlock allows completely decoupling spinlock producer
and consumer life times, simplifying the handling in most use cases.

v2: improve naming, coverage and function documentation
v3: fix one additional locking in the selftests
v4: separate out some changes to make the patch smaller,
    fix one amdgpu crash found by CI systems
v5: improve comments

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://lore.kernel.org/r/20260219160822.1529-5-christian.koenig@amd.com
---
 include/linux/dma-fence.h | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 85d6eac9fa85..3dc93f068bf6 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -34,7 +34,8 @@ struct seq_file;
  * @ops: dma_fence_ops associated with this fence
  * @rcu: used for releasing fence with kfree_rcu
  * @cb_list: list of all callbacks to call
- * @lock: spin_lock_irqsave used for locking
+ * @extern_lock: external spin_lock_irqsave used for locking (deprecated)
+ * @inline_lock: alternative internal spin_lock_irqsave used for locking
  * @context: execution context this fence belongs to, returned by
  *           dma_fence_context_alloc()
  * @seqno: the sequence number of this fence inside the execution context,
@@ -49,6 +50,7 @@ struct seq_file;
  * of the time.
  *
  * DMA_FENCE_FLAG_INITIALIZED_BIT - fence was initialized
+ * DMA_FENCE_FLAG_INLINE_LOCK_BIT - use inline spinlock instead of external one
  * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled
  * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling
  * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called
@@ -66,7 +68,10 @@ struct seq_file;
  * been completed, or never called at all.
  */
 struct dma_fence {
-	spinlock_t *lock;
+	union {
+		spinlock_t *extern_lock;
+		spinlock_t inline_lock;
+	};
 	const struct dma_fence_ops __rcu *ops;
 	/*
 	 * We clear the callback list on kref_put so that by the time we
@@ -100,6 +105,7 @@ struct dma_fence {
 
 enum dma_fence_flag_bits {
 	DMA_FENCE_FLAG_INITIALIZED_BIT,
+	DMA_FENCE_FLAG_INLINE_LOCK_BIT,
 	DMA_FENCE_FLAG_SEQNO64_BIT,
 	DMA_FENCE_FLAG_SIGNALED_BIT,
 	DMA_FENCE_FLAG_TIMESTAMP_BIT,
@@ -381,11 +387,12 @@ dma_fence_get_rcu_safe(struct dma_fence __rcu **fencep)
  * dma_fence_spinlock - return pointer to the spinlock protecting the fence
  * @fence: the fence to get the lock from
  *
- * Return the pointer to the extern lock.
+ * Return either the pointer to the embedded or the external spin lock.
  */
 static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
 {
-	return fence->lock;
+	return test_bit(DMA_FENCE_FLAG_INLINE_LOCK_BIT, &fence->flags) ?
+		&fence->inline_lock : fence->extern_lock;
 }
 
 /**
@@ -396,7 +403,7 @@ static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
  * Lock the fence, preventing it from changing to the signaled state.
  */
 #define dma_fence_lock_irqsave(fence, flags)	\
-	spin_lock_irqsave(fence->lock, flags)
+	spin_lock_irqsave(dma_fence_spinlock(fence), flags)
 
 /**
  * dma_fence_unlock_irqrestore - unlock the fence and irqrestore
@@ -406,7 +413,7 @@ static inline spinlock_t *dma_fence_spinlock(struct dma_fence *fence)
  * Unlock the fence, allowing it to change it's state to signaled again.
  */
 #define dma_fence_unlock_irqrestore(fence, flags)	\
-	spin_unlock_irqrestore(fence->lock, flags)
+	spin_unlock_irqrestore(dma_fence_spinlock(fence), flags)
 
 /**
  * dma_fence_assert_held - lockdep assertion that fence is locked
-- 
cgit v1.2.3


From 5943243914b9fed8e26edcb9d45421721a5e3576 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Thu, 9 Oct 2025 16:18:53 +0200
Subject: dma-buf: use inline lock for the dma-fence-array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using the inline lock is now the recommended way for dma_fence
implementations.

So use this approach for the framework's internal fences as well.

Also saves about 4 bytes for the external spinlock.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Philipp Stanner <phasta@kernel.org>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://lore.kernel.org/r/20260219160822.1529-8-christian.koenig@amd.com
---
 include/linux/dma-fence-array.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h
index 079b3dec0a16..370b3d2bba37 100644
--- a/include/linux/dma-fence-array.h
+++ b/include/linux/dma-fence-array.h
@@ -38,7 +38,6 @@ struct dma_fence_array_cb {
 struct dma_fence_array {
 	struct dma_fence base;
 
-	spinlock_t lock;
 	unsigned num_fences;
 	atomic_t num_pending;
 	struct dma_fence **fences;
-- 
cgit v1.2.3


From a408c0ca0c411ca1ead995bdae3112a806c87556 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Thu, 9 Oct 2025 16:32:33 +0200
Subject: dma-buf: use inline lock for the dma-fence-chain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using the inline lock is now the recommended way for dma_fence
implementations.

So use this approach for the framework's internal fences as well.

Also saves about 4 bytes for the external spinlock.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Philipp Stanner <phasta@kernel.org>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://lore.kernel.org/r/20260219160822.1529-9-christian.koenig@amd.com
---
 include/linux/dma-fence-chain.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
index 5cd3ba53b4a1..df3beadf1515 100644
--- a/include/linux/dma-fence-chain.h
+++ b/include/linux/dma-fence-chain.h
@@ -46,7 +46,6 @@ struct dma_fence_chain {
 		 */
 		struct irq_work work;
 	};
-	spinlock_t lock;
 };
 
 
-- 
cgit v1.2.3


From be6d4c9e9d714ebbf358be41332726a0f94b9ffa Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sat, 31 Jan 2026 07:34:16 +0200
Subject: dma-buf: Add dma_buf_attach_revocable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some exporters need a flow to synchronously revoke access to the DMA-buf
by importers. Once revoke is completed the importer is not permitted to
touch the memory otherwise they may get IOMMU faults, AERs, or worse.

DMA-buf today defines a revoke flow, for both pinned and dynamic
importers, which is broadly:

	dma_resv_lock(dmabuf->resv, NULL);
	// Prevent new mappings from being established
	priv->revoked = true;

	// Tell all importers to eventually unmap
	dma_buf_invalidate_mappings(dmabuf);

	// Wait for any inprogress fences on the old mapping
	dma_resv_wait_timeout(dmabuf->resv,
			      DMA_RESV_USAGE_BOOKKEEP, false,
			      MAX_SCHEDULE_TIMEOUT);
	dma_resv_unlock(dmabuf->resv, NULL);

	// Wait for all importers to complete unmap
	wait_for_completion(&priv->unmapped_comp);

This works well, and an importer that continues to access the DMA-buf
after unmapping it is very buggy.

However, the final wait for unmap is effectively unbounded. Several
importers do not support invalidate_mappings() at all and won't unmap
until userspace triggers it.

This unbounded wait is not suitable for exporters like VFIO and RDMA tha
need to issue revoke as part of their normal operations.

Add dma_buf_attach_revocable() to allow exporters to determine the
difference between importers that can complete the above in bounded time,
and those that can't. It can be called inside the exporter's attach op to
reject incompatible importers.

Document these details about how dma_buf_invalidate_mappings() works and
what the required sequence is to achieve a full revocation.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/20260131-dmabuf-revoke-v7-6-463d956bd527@nvidia.com
---
 include/linux/dma-buf.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index e744b8f9bfad..166933b82e27 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -456,12 +456,8 @@ struct dma_buf_attach_ops {
 	 * called with this lock held as well. This makes sure that no mapping
 	 * is created concurrently with an ongoing move operation.
 	 *
-	 * Mappings stay valid and are not directly affected by this callback.
-	 * But the DMA-buf can now be in a different physical location, so all
-	 * mappings should be destroyed and re-created as soon as possible.
-	 *
-	 * New mappings can be created after this callback returns, and will
-	 * point to the new location of the DMA-buf.
+	 * See the kdoc for dma_buf_invalidate_mappings() for details on the
+	 * required behavior.
 	 */
 	void (*invalidate_mappings)(struct dma_buf_attachment *attach);
 };
@@ -579,6 +575,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
 void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
 				enum dma_data_direction);
 void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
+bool dma_buf_attach_revocable(struct dma_buf_attachment *attach);
 int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 			     enum dma_data_direction dir);
 int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
-- 
cgit v1.2.3