summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/xe/Makefile1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h2
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c8
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c23
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.c4
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c9
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c33
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c2
-rw-r--r--drivers/gpu/drm/xe/xe_mem_pool.c403
-rw-r--r--drivers/gpu/drm/xe/xe_mem_pool.h35
-rw-r--r--drivers/gpu/drm/xe/xe_mem_pool_types.h21
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c56
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c1
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.c54
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c2
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c5
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c47
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c8
22 files changed, 621 insertions, 105 deletions
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 49de1c22a469..03242e8b3d87 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -88,6 +88,7 @@ xe-y += xe_bb.o \
xe_irq.o \
xe_late_bind_fw.o \
xe_lrc.o \
+ xe_mem_pool.o \
xe_migrate.o \
xe_mmio.o \
xe_mmio_gem.o \
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 4ebaa0888a43..9c88ca3ce768 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -583,7 +583,7 @@
#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32)
#define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32)
-#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0)
+#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0, XE_REG_OPTION_MASKED)
#define CPSS_AWARE_DIS REG_BIT(3)
#define SARB_CHICKEN1 XE_REG_MCR(0xe90c)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index a7c2dc7f224c..4075edf97421 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -2322,8 +2322,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
}
/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
- if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
+ if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) {
+ xe_bo_free(bo);
return ERR_PTR(-EINVAL);
+ }
if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
!(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
@@ -2342,8 +2344,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
alignment = SZ_4K >> PAGE_SHIFT;
}
- if (type == ttm_bo_type_device && aligned_size != size)
+ if (type == ttm_bo_type_device && aligned_size != size) {
+ xe_bo_free(bo);
return ERR_PTR(-EINVAL);
+ }
if (!bo) {
bo = xe_bo_alloc();
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index ff8317bfc1ae..9d19940b8fc0 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -18,6 +18,7 @@
#include "xe_ggtt_types.h"
struct xe_device;
+struct xe_mem_pool_node;
struct xe_vm;
#define XE_BO_MAX_PLACEMENTS 3
@@ -88,7 +89,7 @@ struct xe_bo {
bool ccs_cleared;
/** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */
- struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
+ struct xe_mem_pool_node *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
/**
* @cpu_caching: CPU caching mode. Currently only used for userspace
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 7f9602b3363d..b9828da15897 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -258,6 +258,13 @@ out_unlock:
return ERR_PTR(ret);
}
+/*
+ * Takes ownership of @storage: on success it is transferred to the returned
+ * drm_gem_object; on failure it is freed before returning the error.
+ * This matches the contract of xe_bo_init_locked() which frees @storage on
+ * its error paths, so callers need not (and must not) free @storage after
+ * this call.
+ */
static struct drm_gem_object *
xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
struct dma_buf *dma_buf)
@@ -271,8 +278,10 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
int ret = 0;
dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
- if (!dummy_obj)
+ if (!dummy_obj) {
+ xe_bo_free(storage);
return ERR_PTR(-ENOMEM);
+ }
dummy_obj->resv = resv;
xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) {
@@ -281,6 +290,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
if (ret)
break;
+ /* xe_bo_init_locked() frees storage on error */
bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
0, /* Will require 1way or 2way for vm_bind */
ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec);
@@ -368,12 +378,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
goto out_err;
}
- /* Errors here will take care of freeing the bo. */
+ /*
+ * xe_dma_buf_init_obj() takes ownership of bo on both success
+ * and failure, so we must not touch bo after this call.
+ */
obj = xe_dma_buf_init_obj(dev, bo, dma_buf);
- if (IS_ERR(obj))
+ if (IS_ERR(obj)) {
+ dma_buf_detach(dma_buf, attach);
return obj;
-
-
+ }
get_dma_buf(dma_buf);
obj->import_attach = attach;
return obj;
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
index c34408cfd292..dddcdd0bb7a3 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.c
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -869,14 +869,14 @@ static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
struct xe_eu_stall_data_stream *stream = file->private_data;
struct xe_gt *gt = stream->gt;
- drm_dev_put(&gt->tile->xe->drm);
-
mutex_lock(&gt->eu_stall->stream_lock);
xe_eu_stall_disable_locked(stream);
xe_eu_stall_data_buf_destroy(stream);
xe_eu_stall_stream_free(stream);
mutex_unlock(&gt->eu_stall->stream_lock);
+ drm_dev_put(&gt->tile->xe->drm);
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index b287d0e0e60a..071b8c41df43 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -1405,7 +1405,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
if (q->vm && q->hwe->hw_engine_group) {
err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q);
if (err)
- goto put_exec_queue;
+ goto kill_exec_queue;
}
}
@@ -1416,12 +1416,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
/* user id alloc must always be last in ioctl to prevent UAF */
err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
if (err)
- goto kill_exec_queue;
+ goto del_hw_engine_group;
args->exec_queue_id = id;
return 0;
+del_hw_engine_group:
+ if (q->vm && q->hwe && q->hwe->hw_engine_group)
+ xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
kill_exec_queue:
xe_exec_queue_kill(q);
delete_queue_group:
@@ -1760,7 +1763,7 @@ void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
unsigned int type)
{
- xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ xe_assert(gt_to_xe(q->gt), type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
dma_fence_put(q->tlb_inval[type].last_fence);
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index e5c234f3d795..0d13e357fb43 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -166,7 +166,7 @@ static int query_compatibility_version(struct xe_gsc *gsc)
&rd_offset);
if (err) {
xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err);
- return err;
+ goto out_bo;
}
compat->major = version_query_rd(xe, &bo->vmap, rd_offset, proj_major);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index a145234f662b..10556156eaad 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -261,22 +261,10 @@ static void guc_submit_sw_fini(struct drm_device *drm, void *arg)
static void guc_submit_fini(void *arg)
{
struct xe_guc *guc = arg;
-
- /* Forcefully kill any remaining exec queues */
- xe_guc_ct_stop(&guc->ct);
- guc_submit_reset_prepare(guc);
- xe_guc_softreset(guc);
- xe_guc_submit_stop(guc);
- xe_uc_fw_sanitize(&guc->fw);
- xe_guc_submit_pause_abort(guc);
-}
-
-static void guc_submit_wedged_fini(void *arg)
-{
- struct xe_guc *guc = arg;
struct xe_exec_queue *q;
unsigned long index;
+ /* Drop any wedged queue refs */
mutex_lock(&guc->submission_state.lock);
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
if (exec_queue_wedged(q)) {
@@ -286,6 +274,14 @@ static void guc_submit_wedged_fini(void *arg)
}
}
mutex_unlock(&guc->submission_state.lock);
+
+ /* Forcefully kill any remaining exec queues */
+ xe_guc_ct_stop(&guc->ct);
+ guc_submit_reset_prepare(guc);
+ xe_guc_softreset(guc);
+ xe_guc_submit_stop(guc);
+ xe_uc_fw_sanitize(&guc->fw);
+ xe_guc_submit_pause_abort(guc);
}
static const struct xe_exec_queue_ops guc_exec_queue_ops;
@@ -1320,10 +1316,8 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
void xe_guc_submit_wedge(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
- struct xe_gt *gt = guc_to_gt(guc);
struct xe_exec_queue *q;
unsigned long index;
- int err;
xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
@@ -1335,15 +1329,6 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
return;
if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) {
- err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
- guc_submit_wedged_fini, guc);
- if (err) {
- xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; "
- "Although device is wedged.\n",
- xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET));
- return;
- }
-
mutex_lock(&guc->submission_state.lock);
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
if (xe_exec_queue_get_unless_zero(q))
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 9d12a0d2f0b5..c725cde4508d 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -1214,7 +1214,7 @@ static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc,
if (xe_gt_WARN_ON(lrc->gt, max_len < 3))
return -ENOSPC;
- *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
+ *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_LRM_CS_MMIO | MI_LRI_NUM_REGS(1);
*cmd++ = CS_DEBUG_MODE2(0).addr;
*cmd++ = REG_MASKED_FIELD_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
diff --git a/drivers/gpu/drm/xe/xe_mem_pool.c b/drivers/gpu/drm/xe/xe_mem_pool.c
new file mode 100644
index 000000000000..d5e24d6aa88d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mem_pool.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include <linux/kernel.h>
+
+#include <drm/drm_managed.h>
+
+#include "instructions/xe_mi_commands.h"
+#include "xe_bo.h"
+#include "xe_device_types.h"
+#include "xe_map.h"
+#include "xe_mem_pool.h"
+#include "xe_mem_pool_types.h"
+#include "xe_tile_printk.h"
+
+/**
+ * struct xe_mem_pool - DRM MM pool for sub-allocating memory from a BO on an
+ * XE tile.
+ *
+ * The XE memory pool is a DRM MM manager that provides sub-allocation of memory
+ * from a backing buffer object (BO) on a specific XE tile. It is designed to
+ * manage memory for GPU workloads, allowing for efficient allocation and
+ * deallocation of memory regions within the BO.
+ *
+ * The memory pool maintains a primary BO that is pinned in the GGTT and mapped
+ * into the CPU address space for direct access. Optionally, it can also maintain
+ * a shadow BO that can be used for atomic updates to the primary BO's contents.
+ *
+ * The API provided by the memory pool allows clients to allocate and free memory
+ * regions, retrieve GPU and CPU addresses, and synchronize data between the
+ * primary and shadow BOs as needed.
+ */
+struct xe_mem_pool {
+ /** @base: Range allocator over [0, @size) in bytes */
+ struct drm_mm base;
+ /** @bo: Active pool BO (GGTT-pinned, CPU-mapped). */
+ struct xe_bo *bo;
+ /** @shadow: Shadow BO for atomic command updates. */
+ struct xe_bo *shadow;
+ /** @swap_guard: Timeline guard updating @bo and @shadow */
+ struct mutex swap_guard;
+ /** @cpu_addr: CPU virtual address of the active BO. */
+ void *cpu_addr;
+ /** @is_iomem: Indicates if the BO mapping is I/O memory. */
+ bool is_iomem;
+};
+
+static struct xe_mem_pool *node_to_pool(struct xe_mem_pool_node *node)
+{
+ return container_of(node->sa_node.mm, struct xe_mem_pool, base);
+}
+
+static struct xe_tile *pool_to_tile(struct xe_mem_pool *pool)
+{
+ return pool->bo->tile;
+}
+
+static void fini_pool_action(struct drm_device *drm, void *arg)
+{
+ struct xe_mem_pool *pool = arg;
+
+ if (pool->is_iomem)
+ kvfree(pool->cpu_addr);
+
+ drm_mm_takedown(&pool->base);
+}
+
+static int pool_shadow_init(struct xe_mem_pool *pool)
+{
+ struct xe_tile *tile = pool->bo->tile;
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_bo *shadow;
+ int ret;
+
+ xe_assert(xe, !pool->shadow);
+
+ ret = drmm_mutex_init(&xe->drm, &pool->swap_guard);
+ if (ret)
+ return ret;
+
+ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&pool->swap_guard);
+ fs_reclaim_release(GFP_KERNEL);
+ }
+ shadow = xe_managed_bo_create_pin_map(xe, tile,
+ xe_bo_size(pool->bo),
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
+ if (IS_ERR(shadow))
+ return PTR_ERR(shadow);
+
+ pool->shadow = shadow;
+
+ return 0;
+}
+
+/**
+ * xe_mem_pool_init() - Initialize memory pool.
+ * @tile: the &xe_tile where allocate.
+ * @size: number of bytes to allocate.
+ * @guard: the size of the guard region at the end of the BO that is not
+ * sub-allocated, in bytes.
+ * @flags: flags to use to create shadow pool.
+ *
+ * Initializes a memory pool for sub-allocating memory from a backing BO on the
+ * specified XE tile. The backing BO is pinned in the GGTT and mapped into
+ * the CPU address space for direct access. Optionally, a shadow BO can also be
+ * initialized for atomic updates to the primary BO's contents.
+ *
+ * Returns: a pointer to the &xe_mem_pool, or an error pointer on failure.
+ */
+struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size,
+ u32 guard, int flags)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_mem_pool *pool;
+ struct xe_bo *bo;
+ u32 managed_size;
+ int ret;
+
+ xe_tile_assert(tile, size > guard);
+ managed_size = size - guard;
+
+ pool = drmm_kzalloc(&xe->drm, sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return ERR_PTR(-ENOMEM);
+
+ bo = xe_managed_bo_create_pin_map(xe, tile, size,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
+ if (IS_ERR(bo)) {
+ xe_tile_err(tile, "Failed to prepare %uKiB BO for mem pool (%pe)\n",
+ size / SZ_1K, bo);
+ return ERR_CAST(bo);
+ }
+ pool->bo = bo;
+ pool->is_iomem = bo->vmap.is_iomem;
+
+ if (pool->is_iomem) {
+ pool->cpu_addr = kvzalloc(size, GFP_KERNEL);
+ if (!pool->cpu_addr)
+ return ERR_PTR(-ENOMEM);
+ } else {
+ pool->cpu_addr = bo->vmap.vaddr;
+ }
+
+ if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) {
+ ret = pool_shadow_init(pool);
+
+ if (ret)
+ goto out_err;
+ }
+
+ drm_mm_init(&pool->base, 0, managed_size);
+ ret = drmm_add_action_or_reset(&xe->drm, fini_pool_action, pool);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return pool;
+
+out_err:
+ if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY)
+ xe_tile_err(tile,
+ "Failed to initialize shadow BO for mem pool (%d)\n", ret);
+ if (bo->vmap.is_iomem)
+ kvfree(pool->cpu_addr);
+ return ERR_PTR(ret);
+}
+
+/**
+ * xe_mem_pool_sync() - Copy the entire contents of the main pool to shadow pool.
+ * @pool: the memory pool containing the primary and shadow BOs.
+ *
+ * Copies the entire contents of the primary pool to the shadow pool. This must
+ * be done after xe_mem_pool_init() with the XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY
+ * flag to ensure that the shadow pool has the same initial contents as the primary
+ * pool. After this initial synchronization, clients can choose to synchronize the
+ * shadow pool with the primary pool on a node basis using
+ * xe_mem_pool_sync_shadow_locked() as needed.
+ *
+ * Return: None.
+ */
+void xe_mem_pool_sync(struct xe_mem_pool *pool)
+{
+ struct xe_tile *tile = pool_to_tile(pool);
+ struct xe_device *xe = tile_to_xe(tile);
+
+ xe_tile_assert(tile, pool->shadow);
+
+ xe_map_memcpy_to(xe, &pool->shadow->vmap, 0,
+ pool->cpu_addr, xe_bo_size(pool->bo));
+}
+
+/**
+ * xe_mem_pool_swap_shadow_locked() - Swap the primary BO with the shadow BO.
+ * @pool: the memory pool containing the primary and shadow BOs.
+ *
+ * Swaps the primary buffer object with the shadow buffer object in the mem
+ * pool. This allows for atomic updates to the contents of the primary BO
+ * by first writing to the shadow BO and then swapping it with the primary BO.
+ * Swap_guard must be held to ensure synchronization with any concurrent swap
+ * operations.
+ *
+ * Return: None.
+ */
+void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool)
+{
+ struct xe_tile *tile = pool_to_tile(pool);
+
+ xe_tile_assert(tile, pool->shadow);
+ lockdep_assert_held(&pool->swap_guard);
+
+ swap(pool->bo, pool->shadow);
+ if (!pool->bo->vmap.is_iomem)
+ pool->cpu_addr = pool->bo->vmap.vaddr;
+}
+
+/**
+ * xe_mem_pool_sync_shadow_locked() - Copy node from primary pool to shadow pool.
+ * @node: the node allocated in the memory pool.
+ *
+ * Copies the specified batch buffer from the primary pool to the shadow pool.
+ * Swap_guard must be held to ensure synchronization with any concurrent swap
+ * operations.
+ *
+ * Return: None.
+ */
+void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node)
+{
+ struct xe_mem_pool *pool = node_to_pool(node);
+ struct xe_tile *tile = pool_to_tile(pool);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct drm_mm_node *sa_node = &node->sa_node;
+
+ xe_tile_assert(tile, pool->shadow);
+ lockdep_assert_held(&pool->swap_guard);
+
+ xe_map_memcpy_to(xe, &pool->shadow->vmap,
+ sa_node->start,
+ pool->cpu_addr + sa_node->start,
+ sa_node->size);
+}
+
+/**
+ * xe_mem_pool_gpu_addr() - Retrieve GPU address of memory pool.
+ * @pool: the memory pool
+ *
+ * Returns: GGTT address of the memory pool.
+ */
+u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool)
+{
+ return xe_bo_ggtt_addr(pool->bo);
+}
+
+/**
+ * xe_mem_pool_cpu_addr() - Retrieve CPU address of manager pool.
+ * @pool: the memory pool
+ *
+ * Returns: CPU virtual address of memory pool.
+ */
+void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool)
+{
+ return pool->cpu_addr;
+}
+
+/**
+ * xe_mem_pool_bo_swap_guard() - Retrieve the mutex used to guard swap
+ * operations on a memory pool.
+ * @pool: the memory pool
+ *
+ * Returns: Swap guard mutex or NULL if shadow pool is not created.
+ */
+struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool)
+{
+ if (!pool->shadow)
+ return NULL;
+
+ return &pool->swap_guard;
+}
+
+/**
+ * xe_mem_pool_bo_flush_write() - Copy the data from the sub-allocation
+ * to the GPU memory.
+ * @node: the node allocated in the memory pool to flush.
+ */
+void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node)
+{
+ struct xe_mem_pool *pool = node_to_pool(node);
+ struct xe_tile *tile = pool_to_tile(pool);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct drm_mm_node *sa_node = &node->sa_node;
+
+ if (!pool->bo->vmap.is_iomem)
+ return;
+
+ xe_map_memcpy_to(xe, &pool->bo->vmap, sa_node->start,
+ pool->cpu_addr + sa_node->start,
+ sa_node->size);
+}
+
+/**
+ * xe_mem_pool_bo_sync_read() - Copy the data from GPU memory to the
+ * sub-allocation.
+ * @node: the node allocated in the memory pool to read back.
+ */
+void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node)
+{
+ struct xe_mem_pool *pool = node_to_pool(node);
+ struct xe_tile *tile = pool_to_tile(pool);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct drm_mm_node *sa_node = &node->sa_node;
+
+ if (!pool->bo->vmap.is_iomem)
+ return;
+
+ xe_map_memcpy_from(xe, pool->cpu_addr + sa_node->start,
+ &pool->bo->vmap, sa_node->start, sa_node->size);
+}
+
+/**
+ * xe_mem_pool_alloc_node() - Allocate a new node for use with xe_mem_pool.
+ *
+ * Returns: node structure or an ERR_PTR(-ENOMEM).
+ */
+struct xe_mem_pool_node *xe_mem_pool_alloc_node(void)
+{
+ struct xe_mem_pool_node *node = kzalloc_obj(*node);
+
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ return node;
+}
+
+/**
+ * xe_mem_pool_insert_node() - Insert a node into the memory pool.
+ * @pool: the memory pool to insert into
+ * @node: the node to insert
+ * @size: the size of the node to be allocated in bytes.
+ *
+ * Inserts a node into the specified memory pool using drm_mm for
+ * allocation.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int xe_mem_pool_insert_node(struct xe_mem_pool *pool,
+ struct xe_mem_pool_node *node, u32 size)
+{
+ if (!pool)
+ return -EINVAL;
+
+ return drm_mm_insert_node(&pool->base, &node->sa_node, size);
+}
+
+/**
+ * xe_mem_pool_free_node() - Free a node allocated from the memory pool.
+ * @node: the node to free
+ *
+ * Returns: None.
+ */
+void xe_mem_pool_free_node(struct xe_mem_pool_node *node)
+{
+ if (!node)
+ return;
+
+ drm_mm_remove_node(&node->sa_node);
+ kfree(node);
+}
+
+/**
+ * xe_mem_pool_node_cpu_addr() - Retrieve CPU address of the node.
+ * @node: the node allocated in the memory pool
+ *
+ * Returns: CPU virtual address of the node.
+ */
+void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node)
+{
+ struct xe_mem_pool *pool = node_to_pool(node);
+
+ return xe_mem_pool_cpu_addr(pool) + node->sa_node.start;
+}
+
+/**
+ * xe_mem_pool_dump() - Dump the state of the DRM MM manager for debugging.
+ * @pool: the memory pool info be dumped.
+ * @p: The DRM printer to use for output.
+ *
+ * Only the drm managed region is dumped, not the state of the BOs or any other
+ * pool information.
+ *
+ * Returns: None.
+ */
+void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p)
+{
+ drm_mm_print(&pool->base, p);
+}
diff --git a/drivers/gpu/drm/xe/xe_mem_pool.h b/drivers/gpu/drm/xe/xe_mem_pool.h
new file mode 100644
index 000000000000..89cd2555fe91
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mem_pool.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+#ifndef _XE_MEM_POOL_H_
+#define _XE_MEM_POOL_H_
+
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+#include <drm/drm_mm.h>
+#include "xe_mem_pool_types.h"
+
+struct drm_printer;
+struct xe_mem_pool;
+struct xe_tile;
+
+struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size,
+ u32 guard, int flags);
+void xe_mem_pool_sync(struct xe_mem_pool *pool);
+void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool);
+void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node);
+u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool);
+void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool);
+struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool);
+void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node);
+void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node);
+struct xe_mem_pool_node *xe_mem_pool_alloc_node(void);
+int xe_mem_pool_insert_node(struct xe_mem_pool *pool,
+ struct xe_mem_pool_node *node, u32 size);
+void xe_mem_pool_free_node(struct xe_mem_pool_node *node);
+void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node);
+void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mem_pool_types.h b/drivers/gpu/drm/xe/xe_mem_pool_types.h
new file mode 100644
index 000000000000..d5e926c93351
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mem_pool_types.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef _XE_MEM_POOL_TYPES_H_
+#define _XE_MEM_POOL_TYPES_H_
+
+#include <drm/drm_mm.h>
+
+#define XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY BIT(0)
+
+/**
+ * struct xe_mem_pool_node - Sub-range allocations from mem pool.
+ */
+struct xe_mem_pool_node {
+ /** @sa_node: drm_mm_node for this allocation. */
+ struct drm_mm_node sa_node;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index fc918b4fba54..5fdc89ed5256 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -29,6 +29,7 @@
#include "xe_hw_engine.h"
#include "xe_lrc.h"
#include "xe_map.h"
+#include "xe_mem_pool.h"
#include "xe_mocs.h"
#include "xe_printk.h"
#include "xe_pt.h"
@@ -1166,11 +1167,12 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
u32 batch_size, batch_size_allocated;
struct xe_device *xe = gt_to_xe(gt);
struct xe_res_cursor src_it, ccs_it;
+ struct xe_mem_pool *bb_pool;
struct xe_sriov_vf_ccs_ctx *ctx;
- struct xe_sa_manager *bb_pool;
u64 size = xe_bo_size(src_bo);
- struct xe_bb *bb = NULL;
+ struct xe_mem_pool_node *bb;
u64 src_L0, src_L0_ofs;
+ struct xe_bb xe_bb_tmp;
u32 src_L0_pt;
int err;
@@ -1208,18 +1210,18 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
size -= src_L0;
}
- bb = xe_bb_alloc(gt);
+ bb = xe_mem_pool_alloc_node();
if (IS_ERR(bb))
return PTR_ERR(bb);
bb_pool = ctx->mem.ccs_bb_pool;
- scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) {
- xe_sa_bo_swap_shadow(bb_pool);
+ scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
+ xe_mem_pool_swap_shadow_locked(bb_pool);
- err = xe_bb_init(bb, bb_pool, batch_size);
+ err = xe_mem_pool_insert_node(bb_pool, bb, batch_size * sizeof(u32));
if (err) {
xe_gt_err(gt, "BB allocation failed.\n");
- xe_bb_free(bb, NULL);
+ kfree(bb);
return err;
}
@@ -1227,6 +1229,7 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
size = xe_bo_size(src_bo);
batch_size = 0;
+ xe_bb_tmp = (struct xe_bb){ .cs = xe_mem_pool_node_cpu_addr(bb), .len = 0 };
/*
* Emit PTE and copy commands here.
* The CCS copy command can only support limited size. If the size to be
@@ -1255,24 +1258,27 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
batch_size += EMIT_COPY_CCS_DW;
- emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src);
+ emit_pte(m, &xe_bb_tmp, src_L0_pt, false, true, &src_it, src_L0, src);
- emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
+ emit_pte(m, &xe_bb_tmp, ccs_pt, false, false, &ccs_it, ccs_size, src);
- bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
- flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt,
+ xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
+ flush_flags);
+ flush_flags = xe_migrate_ccs_copy(m, &xe_bb_tmp, src_L0_ofs, src_is_pltt,
src_L0_ofs, dst_is_pltt,
src_L0, ccs_ofs, true);
- bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
+ xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
+ flush_flags);
size -= src_L0;
}
- xe_assert(xe, (batch_size_allocated == bb->len));
+ xe_assert(xe, (batch_size_allocated == xe_bb_tmp.len));
+ xe_assert(xe, bb->sa_node.size == xe_bb_tmp.len * sizeof(u32));
src_bo->bb_ccs[read_write] = bb;
xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
- xe_sa_bo_sync_shadow(bb->bo);
+ xe_mem_pool_sync_shadow_locked(bb);
}
return 0;
@@ -1297,10 +1303,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
enum xe_sriov_vf_ccs_rw_ctxs read_write)
{
- struct xe_bb *bb = src_bo->bb_ccs[read_write];
+ struct xe_mem_pool_node *bb = src_bo->bb_ccs[read_write];
struct xe_device *xe = xe_bo_device(src_bo);
+ struct xe_mem_pool *bb_pool;
struct xe_sriov_vf_ccs_ctx *ctx;
- struct xe_sa_manager *bb_pool;
u32 *cs;
xe_assert(xe, IS_SRIOV_VF(xe));
@@ -1308,17 +1314,17 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
ctx = &xe->sriov.vf.ccs.contexts[read_write];
bb_pool = ctx->mem.ccs_bb_pool;
- guard(mutex) (xe_sa_bo_swap_guard(bb_pool));
- xe_sa_bo_swap_shadow(bb_pool);
-
- cs = xe_sa_bo_cpu_addr(bb->bo);
- memset(cs, MI_NOOP, bb->len * sizeof(u32));
- xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
+ scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
+ xe_mem_pool_swap_shadow_locked(bb_pool);
- xe_sa_bo_sync_shadow(bb->bo);
+ cs = xe_mem_pool_node_cpu_addr(bb);
+ memset(cs, MI_NOOP, bb->sa_node.size);
+ xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
- xe_bb_free(bb, NULL);
- src_bo->bb_ccs[read_write] = NULL;
+ xe_mem_pool_sync_shadow_locked(bb);
+ xe_mem_pool_free_node(bb);
+ src_bo->bb_ccs[read_write] = NULL;
+ }
}
/**
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 01673d2b2464..9f98d0334164 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -118,6 +118,7 @@ static const struct xe_graphics_desc graphics_xe2 = {
static const struct xe_graphics_desc graphics_xe3p_lpg = {
XE2_GFX_FEATURES,
+ .has_indirect_ring_state = 1,
.multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE),
.num_geometry_xecore_fuse_regs = 3,
.num_compute_xecore_fuse_regs = 3,
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 80577e4b7437..8cc313182968 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -226,7 +226,7 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
}
range_start = reg & REG_GENMASK(25, range_bit);
- range_end = range_start | REG_GENMASK(range_bit, 0);
+ range_end = range_start | REG_GENMASK(range_bit - 1, 0);
switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) {
case RING_FORCE_TO_NONPRIV_ACCESS_RW:
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
index db023fb66a27..09b99fb2608b 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
@@ -14,9 +14,9 @@
#include "xe_guc.h"
#include "xe_guc_submit.h"
#include "xe_lrc.h"
+#include "xe_mem_pool.h"
#include "xe_migrate.h"
#include "xe_pm.h"
-#include "xe_sa.h"
#include "xe_sriov_printk.h"
#include "xe_sriov_vf.h"
#include "xe_sriov_vf_ccs.h"
@@ -141,43 +141,47 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe)
static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
{
+ struct xe_mem_pool *pool;
struct xe_device *xe = tile_to_xe(tile);
- struct xe_sa_manager *sa_manager;
+ u32 *pool_cpu_addr, *last_dw_addr;
u64 bb_pool_size;
- int offset, err;
+ int err;
bb_pool_size = get_ccs_bb_pool_size(xe);
xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
- sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
- XE_SA_BO_MANAGER_FLAG_SHADOW);
-
- if (IS_ERR(sa_manager)) {
- xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
- sa_manager);
- err = PTR_ERR(sa_manager);
+ pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32),
+ XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY);
+ if (IS_ERR(pool)) {
+ xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n",
+ pool);
+ err = PTR_ERR(pool);
return err;
}
- offset = 0;
- xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
- bb_pool_size);
- xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
- bb_pool_size);
+ pool_cpu_addr = xe_mem_pool_cpu_addr(pool);
+ memset(pool_cpu_addr, 0, bb_pool_size);
- offset = bb_pool_size - sizeof(u32);
- xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
- xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
+ last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1;
+ *last_dw_addr = MI_BATCH_BUFFER_END;
- ctx->mem.ccs_bb_pool = sa_manager;
+ /**
+ * Sync the main copy and shadow copy so that the shadow copy is
+ * replica of main copy. We sync only BBs after init part. So, we
+ * need to make sure the main pool and shadow copy are in sync after
+ * this point. This is needed as GuC may read the BB commands from
+ * shadow copy.
+ */
+ xe_mem_pool_sync(pool);
+ ctx->mem.ccs_bb_pool = pool;
return 0;
}
static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
{
- u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
+ u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
u32 dw[10], i = 0;
@@ -388,7 +392,7 @@ err_ret:
#define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32))
void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
{
- u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
+ u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
@@ -412,8 +416,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
struct xe_device *xe = xe_bo_device(bo);
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
struct xe_sriov_vf_ccs_ctx *ctx;
+ struct xe_mem_pool_node *bb;
struct xe_tile *tile;
- struct xe_bb *bb;
int err = 0;
xe_assert(xe, IS_VF_CCS_READY(xe));
@@ -445,7 +449,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
{
struct xe_device *xe = xe_bo_device(bo);
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
- struct xe_bb *bb;
+ struct xe_mem_pool_node *bb;
xe_assert(xe, IS_VF_CCS_READY(xe));
@@ -471,8 +475,8 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
*/
void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
{
- struct xe_sa_manager *bb_pool;
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+ struct xe_mem_pool *bb_pool;
if (!IS_VF_CCS_READY(xe))
return;
@@ -485,7 +489,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
drm_printf(p, "-------------------------\n");
- drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
+ xe_mem_pool_dump(bb_pool, p);
drm_puts(p, "\n");
}
}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
index 22c499943d2a..6fc8f97ef3f4 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
@@ -17,9 +17,6 @@ enum xe_sriov_vf_ccs_rw_ctxs {
XE_SRIOV_VF_CCS_CTX_COUNT
};
-struct xe_migrate;
-struct xe_sa_manager;
-
/**
* struct xe_sriov_vf_ccs_ctx - VF CCS migration context data.
*/
@@ -33,7 +30,7 @@ struct xe_sriov_vf_ccs_ctx {
/** @mem: memory data */
struct {
/** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */
- struct xe_sa_manager *ccs_bb_pool;
+ struct xe_mem_pool *ccs_bb_pool;
} mem;
};
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index f8de6a4bf189..0b78ec2bc6a4 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -97,7 +97,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
{ XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED),
IS_INTEGRATED),
- XE_RTP_ACTIONS(FIELD_SET(XEHP_GAMSTLB_CTRL, BANK_HASH_MODE,
+ XE_RTP_ACTIONS(FIELD_SET(GAMSTLB_CTRL, BANK_HASH_MODE,
BANK_HASH_4KB_MODE))
},
};
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 56e2db50bb36..a717a2b8dea3 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3658,6 +3658,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+ XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE &&
+ is_cpu_addr_mirror) ||
XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) &&
(op == DRM_XE_VM_BIND_OP_MAP_USERPTR ||
is_cpu_addr_mirror) &&
@@ -4156,7 +4158,8 @@ int xe_vm_get_property_ioctl(struct drm_device *drm, void *data,
int ret = 0;
if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] ||
- args->reserved[2])))
+ args->reserved[2] || args->extensions ||
+ args->pad)))
return -EINVAL;
vm = xe_vm_lookup(xef, args->vm_id);
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 66f00d3f5c07..c78906dea82b 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -621,6 +621,45 @@ static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details
return 0;
}
+static bool check_pat_args_are_sane(struct xe_device *xe,
+ struct xe_vmas_in_madvise_range *madvise_range,
+ u16 pat_index)
+{
+ u16 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+ int i;
+
+ /*
+ * Using coh_none with CPU cached buffers is not allowed on iGPU.
+ * On iGPU the GPU shares the LLC with the CPU, so with coh_none
+ * the GPU bypasses CPU caches and reads directly from DRAM,
+ * potentially seeing stale sensitive data from previously freed
+ * pages. On dGPU this restriction does not apply, because the
+ * platform does not provide a non-coherent system memory access
+ * path that would violate the DMA coherency contract.
+ */
+ if (coh_mode != XE_COH_NONE || IS_DGFX(xe))
+ return true;
+
+ for (i = 0; i < madvise_range->num_vmas; i++) {
+ struct xe_vma *vma = madvise_range->vmas[i];
+ struct xe_bo *bo = xe_vma_bo(vma);
+
+ if (bo) {
+ /* BO with WB caching + COH_NONE is not allowed */
+ if (XE_IOCTL_DBG(xe, bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
+ return false;
+ /* Imported dma-buf without caching info, assume cached */
+ if (XE_IOCTL_DBG(xe, !bo->cpu_caching))
+ return false;
+ } else if (XE_IOCTL_DBG(xe, xe_vma_is_cpu_addr_mirror(vma) ||
+ xe_vma_is_userptr(vma)))
+ /* System memory (userptr/SVM) is always CPU cached */
+ return false;
+ }
+
+ return true;
+}
+
static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
int num_vmas, u32 atomic_val)
{
@@ -750,6 +789,14 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
}
}
+ if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) {
+ if (!check_pat_args_are_sane(xe, &madvise_range,
+ args->pat_index.val)) {
+ err = -EINVAL;
+ goto free_vmas;
+ }
+ }
+
if (madvise_range.has_bo_vmas) {
if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
if (!check_bo_args_are_sane(vm, madvise_range.vmas,
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 546296f0220b..4b1cbced06be 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -743,14 +743,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
},
- { XE_RTP_NAME("14019988906"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
- },
- { XE_RTP_NAME("14019877138"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
- },
{ XE_RTP_NAME("14021490052"),
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(FF_MODE,