diff options
| -rw-r--r-- | drivers/gpu/drm/xe/Makefile | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_bo.c | 8 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_bo_types.h | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_dma_buf.c | 23 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_eu_stall.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_exec_queue.c | 9 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_gsc.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_guc_submit.c | 33 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_lrc.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_mem_pool.c | 403 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_mem_pool.h | 35 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_mem_pool_types.h | 21 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_migrate.c | 56 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_pci.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_reg_whitelist.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 54 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h | 5 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_tuning.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 5 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_vm_madvise.c | 47 | ||||
| -rw-r--r-- | drivers/gpu/drm/xe/xe_wa.c | 8 |
22 files changed, 621 insertions, 105 deletions
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 49de1c22a469..03242e8b3d87 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -88,6 +88,7 @@ xe-y += xe_bb.o \ xe_irq.o \ xe_late_bind_fw.o \ xe_lrc.o \ + xe_mem_pool.o \ xe_migrate.o \ xe_mmio.o \ xe_mmio_gem.o \ diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 4ebaa0888a43..9c88ca3ce768 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -583,7 +583,7 @@ #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) #define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32) -#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0) +#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0, XE_REG_OPTION_MASKED) #define CPSS_AWARE_DIS REG_BIT(3) #define SARB_CHICKEN1 XE_REG_MCR(0xe90c) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index a7c2dc7f224c..4075edf97421 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2322,8 +2322,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, } /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */ - if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) + if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) { + xe_bo_free(bo); return ERR_PTR(-EINVAL); + } if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) && !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) && @@ -2342,8 +2344,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, alignment = SZ_4K >> PAGE_SHIFT; } - if (type == ttm_bo_type_device && aligned_size != size) + if (type == ttm_bo_type_device && aligned_size != size) { + xe_bo_free(bo); return ERR_PTR(-EINVAL); + } if (!bo) { bo = xe_bo_alloc(); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index ff8317bfc1ae..9d19940b8fc0 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -18,6 +18,7 @@ #include "xe_ggtt_types.h" struct xe_device; +struct xe_mem_pool_node; struct xe_vm; #define XE_BO_MAX_PLACEMENTS 3 @@ -88,7 +89,7 @@ struct xe_bo { bool ccs_cleared; /** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */ - struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; + struct xe_mem_pool_node *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; /** * @cpu_caching: CPU caching mode. Currently only used for userspace diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 7f9602b3363d..b9828da15897 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -258,6 +258,13 @@ out_unlock: return ERR_PTR(ret); } +/* + * Takes ownership of @storage: on success it is transferred to the returned + * drm_gem_object; on failure it is freed before returning the error. + * This matches the contract of xe_bo_init_locked() which frees @storage on + * its error paths, so callers need not (and must not) free @storage after + * this call. + */ static struct drm_gem_object * xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, struct dma_buf *dma_buf) @@ -271,8 +278,10 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, int ret = 0; dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm); - if (!dummy_obj) + if (!dummy_obj) { + xe_bo_free(storage); return ERR_PTR(-ENOMEM); + } dummy_obj->resv = resv; xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) { @@ -281,6 +290,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, if (ret) break; + /* xe_bo_init_locked() frees storage on error */ bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size, 0, /* Will require 1way or 2way for vm_bind */ ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec); @@ -368,12 +378,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, goto out_err; } - /* Errors here will take care of freeing the bo. */ + /* + * xe_dma_buf_init_obj() takes ownership of bo on both success + * and failure, so we must not touch bo after this call. + */ obj = xe_dma_buf_init_obj(dev, bo, dma_buf); - if (IS_ERR(obj)) + if (IS_ERR(obj)) { + dma_buf_detach(dma_buf, attach); return obj; - - + } get_dma_buf(dma_buf); obj->import_attach = attach; return obj; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index c34408cfd292..dddcdd0bb7a3 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -869,14 +869,14 @@ static int xe_eu_stall_stream_close(struct inode *inode, struct file *file) struct xe_eu_stall_data_stream *stream = file->private_data; struct xe_gt *gt = stream->gt; - drm_dev_put(>->tile->xe->drm); - mutex_lock(>->eu_stall->stream_lock); xe_eu_stall_disable_locked(stream); xe_eu_stall_data_buf_destroy(stream); xe_eu_stall_stream_free(stream); mutex_unlock(>->eu_stall->stream_lock); + drm_dev_put(>->tile->xe->drm); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index b287d0e0e60a..071b8c41df43 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -1405,7 +1405,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (q->vm && q->hwe->hw_engine_group) { err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); if (err) - goto put_exec_queue; + goto kill_exec_queue; } } @@ -1416,12 +1416,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); if (err) - goto kill_exec_queue; + goto del_hw_engine_group; args->exec_queue_id = id; return 0; +del_hw_engine_group: + if (q->vm && q->hwe && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); kill_exec_queue: xe_exec_queue_kill(q); delete_queue_group: @@ -1760,7 +1763,7 @@ void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, unsigned int type) { - xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + xe_assert(gt_to_xe(q->gt), type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); dma_fence_put(q->tlb_inval[type].last_fence); diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index e5c234f3d795..0d13e357fb43 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -166,7 +166,7 @@ static int query_compatibility_version(struct xe_gsc *gsc) &rd_offset); if (err) { xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); - return err; + goto out_bo; } compat->major = version_query_rd(xe, &bo->vmap, rd_offset, proj_major); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index a145234f662b..10556156eaad 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -261,22 +261,10 @@ static void guc_submit_sw_fini(struct drm_device *drm, void *arg) static void guc_submit_fini(void *arg) { struct xe_guc *guc = arg; - - /* Forcefully kill any remaining exec queues */ - xe_guc_ct_stop(&guc->ct); - guc_submit_reset_prepare(guc); - xe_guc_softreset(guc); - xe_guc_submit_stop(guc); - xe_uc_fw_sanitize(&guc->fw); - xe_guc_submit_pause_abort(guc); -} - -static void guc_submit_wedged_fini(void *arg) -{ - struct xe_guc *guc = arg; struct xe_exec_queue *q; unsigned long index; + /* Drop any wedged queue refs */ mutex_lock(&guc->submission_state.lock); xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { if (exec_queue_wedged(q)) { @@ -286,6 +274,14 @@ static void guc_submit_wedged_fini(void *arg) } } mutex_unlock(&guc->submission_state.lock); + + /* Forcefully kill any remaining exec queues */ + xe_guc_ct_stop(&guc->ct); + guc_submit_reset_prepare(guc); + xe_guc_softreset(guc); + xe_guc_submit_stop(guc); + xe_uc_fw_sanitize(&guc->fw); + xe_guc_submit_pause_abort(guc); } static const struct xe_exec_queue_ops guc_exec_queue_ops; @@ -1320,10 +1316,8 @@ static void disable_scheduling_deregister(struct xe_guc *guc, void xe_guc_submit_wedge(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); - struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; unsigned long index; - int err; xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); @@ -1335,15 +1329,6 @@ void xe_guc_submit_wedge(struct xe_guc *guc) return; if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, - guc_submit_wedged_fini, guc); - if (err) { - xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; " - "Although device is wedged.\n", - xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); - return; - } - mutex_lock(&guc->submission_state.lock); xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) if (xe_exec_queue_get_unless_zero(q)) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 9d12a0d2f0b5..c725cde4508d 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1214,7 +1214,7 @@ static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, if (xe_gt_WARN_ON(lrc->gt, max_len < 3)) return -ENOSPC; - *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_LRM_CS_MMIO | MI_LRI_NUM_REGS(1); *cmd++ = CS_DEBUG_MODE2(0).addr; *cmd++ = REG_MASKED_FIELD_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); diff --git a/drivers/gpu/drm/xe/xe_mem_pool.c b/drivers/gpu/drm/xe/xe_mem_pool.c new file mode 100644 index 000000000000..d5e24d6aa88d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mem_pool.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2026 Intel Corporation + */ + +#include <linux/kernel.h> + +#include <drm/drm_managed.h> + +#include "instructions/xe_mi_commands.h" +#include "xe_bo.h" +#include "xe_device_types.h" +#include "xe_map.h" +#include "xe_mem_pool.h" +#include "xe_mem_pool_types.h" +#include "xe_tile_printk.h" + +/** + * struct xe_mem_pool - DRM MM pool for sub-allocating memory from a BO on an + * XE tile. + * + * The XE memory pool is a DRM MM manager that provides sub-allocation of memory + * from a backing buffer object (BO) on a specific XE tile. It is designed to + * manage memory for GPU workloads, allowing for efficient allocation and + * deallocation of memory regions within the BO. + * + * The memory pool maintains a primary BO that is pinned in the GGTT and mapped + * into the CPU address space for direct access. Optionally, it can also maintain + * a shadow BO that can be used for atomic updates to the primary BO's contents. + * + * The API provided by the memory pool allows clients to allocate and free memory + * regions, retrieve GPU and CPU addresses, and synchronize data between the + * primary and shadow BOs as needed. + */ +struct xe_mem_pool { + /** @base: Range allocator over [0, @size) in bytes */ + struct drm_mm base; + /** @bo: Active pool BO (GGTT-pinned, CPU-mapped). */ + struct xe_bo *bo; + /** @shadow: Shadow BO for atomic command updates. */ + struct xe_bo *shadow; + /** @swap_guard: Timeline guard updating @bo and @shadow */ + struct mutex swap_guard; + /** @cpu_addr: CPU virtual address of the active BO. */ + void *cpu_addr; + /** @is_iomem: Indicates if the BO mapping is I/O memory. */ + bool is_iomem; +}; + +static struct xe_mem_pool *node_to_pool(struct xe_mem_pool_node *node) +{ + return container_of(node->sa_node.mm, struct xe_mem_pool, base); +} + +static struct xe_tile *pool_to_tile(struct xe_mem_pool *pool) +{ + return pool->bo->tile; +} + +static void fini_pool_action(struct drm_device *drm, void *arg) +{ + struct xe_mem_pool *pool = arg; + + if (pool->is_iomem) + kvfree(pool->cpu_addr); + + drm_mm_takedown(&pool->base); +} + +static int pool_shadow_init(struct xe_mem_pool *pool) +{ + struct xe_tile *tile = pool->bo->tile; + struct xe_device *xe = tile_to_xe(tile); + struct xe_bo *shadow; + int ret; + + xe_assert(xe, !pool->shadow); + + ret = drmm_mutex_init(&xe->drm, &pool->swap_guard); + if (ret) + return ret; + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&pool->swap_guard); + fs_reclaim_release(GFP_KERNEL); + } + shadow = xe_managed_bo_create_pin_map(xe, tile, + xe_bo_size(pool->bo), + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); + if (IS_ERR(shadow)) + return PTR_ERR(shadow); + + pool->shadow = shadow; + + return 0; +} + +/** + * xe_mem_pool_init() - Initialize memory pool. + * @tile: the &xe_tile where allocate. + * @size: number of bytes to allocate. + * @guard: the size of the guard region at the end of the BO that is not + * sub-allocated, in bytes. + * @flags: flags to use to create shadow pool. + * + * Initializes a memory pool for sub-allocating memory from a backing BO on the + * specified XE tile. The backing BO is pinned in the GGTT and mapped into + * the CPU address space for direct access. Optionally, a shadow BO can also be + * initialized for atomic updates to the primary BO's contents. + * + * Returns: a pointer to the &xe_mem_pool, or an error pointer on failure. + */ +struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size, + u32 guard, int flags) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_mem_pool *pool; + struct xe_bo *bo; + u32 managed_size; + int ret; + + xe_tile_assert(tile, size > guard); + managed_size = size - guard; + + pool = drmm_kzalloc(&xe->drm, sizeof(*pool), GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + + bo = xe_managed_bo_create_pin_map(xe, tile, size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); + if (IS_ERR(bo)) { + xe_tile_err(tile, "Failed to prepare %uKiB BO for mem pool (%pe)\n", + size / SZ_1K, bo); + return ERR_CAST(bo); + } + pool->bo = bo; + pool->is_iomem = bo->vmap.is_iomem; + + if (pool->is_iomem) { + pool->cpu_addr = kvzalloc(size, GFP_KERNEL); + if (!pool->cpu_addr) + return ERR_PTR(-ENOMEM); + } else { + pool->cpu_addr = bo->vmap.vaddr; + } + + if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) { + ret = pool_shadow_init(pool); + + if (ret) + goto out_err; + } + + drm_mm_init(&pool->base, 0, managed_size); + ret = drmm_add_action_or_reset(&xe->drm, fini_pool_action, pool); + if (ret) + return ERR_PTR(ret); + + return pool; + +out_err: + if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) + xe_tile_err(tile, + "Failed to initialize shadow BO for mem pool (%d)\n", ret); + if (bo->vmap.is_iomem) + kvfree(pool->cpu_addr); + return ERR_PTR(ret); +} + +/** + * xe_mem_pool_sync() - Copy the entire contents of the main pool to shadow pool. + * @pool: the memory pool containing the primary and shadow BOs. + * + * Copies the entire contents of the primary pool to the shadow pool. This must + * be done after xe_mem_pool_init() with the XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY + * flag to ensure that the shadow pool has the same initial contents as the primary + * pool. After this initial synchronization, clients can choose to synchronize the + * shadow pool with the primary pool on a node basis using + * xe_mem_pool_sync_shadow_locked() as needed. + * + * Return: None. + */ +void xe_mem_pool_sync(struct xe_mem_pool *pool) +{ + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + + xe_tile_assert(tile, pool->shadow); + + xe_map_memcpy_to(xe, &pool->shadow->vmap, 0, + pool->cpu_addr, xe_bo_size(pool->bo)); +} + +/** + * xe_mem_pool_swap_shadow_locked() - Swap the primary BO with the shadow BO. + * @pool: the memory pool containing the primary and shadow BOs. + * + * Swaps the primary buffer object with the shadow buffer object in the mem + * pool. This allows for atomic updates to the contents of the primary BO + * by first writing to the shadow BO and then swapping it with the primary BO. + * Swap_guard must be held to ensure synchronization with any concurrent swap + * operations. + * + * Return: None. + */ +void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool) +{ + struct xe_tile *tile = pool_to_tile(pool); + + xe_tile_assert(tile, pool->shadow); + lockdep_assert_held(&pool->swap_guard); + + swap(pool->bo, pool->shadow); + if (!pool->bo->vmap.is_iomem) + pool->cpu_addr = pool->bo->vmap.vaddr; +} + +/** + * xe_mem_pool_sync_shadow_locked() - Copy node from primary pool to shadow pool. + * @node: the node allocated in the memory pool. + * + * Copies the specified batch buffer from the primary pool to the shadow pool. + * Swap_guard must be held to ensure synchronization with any concurrent swap + * operations. + * + * Return: None. + */ +void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + struct drm_mm_node *sa_node = &node->sa_node; + + xe_tile_assert(tile, pool->shadow); + lockdep_assert_held(&pool->swap_guard); + + xe_map_memcpy_to(xe, &pool->shadow->vmap, + sa_node->start, + pool->cpu_addr + sa_node->start, + sa_node->size); +} + +/** + * xe_mem_pool_gpu_addr() - Retrieve GPU address of memory pool. + * @pool: the memory pool + * + * Returns: GGTT address of the memory pool. + */ +u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool) +{ + return xe_bo_ggtt_addr(pool->bo); +} + +/** + * xe_mem_pool_cpu_addr() - Retrieve CPU address of manager pool. + * @pool: the memory pool + * + * Returns: CPU virtual address of memory pool. + */ +void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool) +{ + return pool->cpu_addr; +} + +/** + * xe_mem_pool_bo_swap_guard() - Retrieve the mutex used to guard swap + * operations on a memory pool. + * @pool: the memory pool + * + * Returns: Swap guard mutex or NULL if shadow pool is not created. + */ +struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool) +{ + if (!pool->shadow) + return NULL; + + return &pool->swap_guard; +} + +/** + * xe_mem_pool_bo_flush_write() - Copy the data from the sub-allocation + * to the GPU memory. + * @node: the node allocated in the memory pool to flush. + */ +void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + struct drm_mm_node *sa_node = &node->sa_node; + + if (!pool->bo->vmap.is_iomem) + return; + + xe_map_memcpy_to(xe, &pool->bo->vmap, sa_node->start, + pool->cpu_addr + sa_node->start, + sa_node->size); +} + +/** + * xe_mem_pool_bo_sync_read() - Copy the data from GPU memory to the + * sub-allocation. + * @node: the node allocated in the memory pool to read back. + */ +void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + struct drm_mm_node *sa_node = &node->sa_node; + + if (!pool->bo->vmap.is_iomem) + return; + + xe_map_memcpy_from(xe, pool->cpu_addr + sa_node->start, + &pool->bo->vmap, sa_node->start, sa_node->size); +} + +/** + * xe_mem_pool_alloc_node() - Allocate a new node for use with xe_mem_pool. + * + * Returns: node structure or an ERR_PTR(-ENOMEM). + */ +struct xe_mem_pool_node *xe_mem_pool_alloc_node(void) +{ + struct xe_mem_pool_node *node = kzalloc_obj(*node); + + if (!node) + return ERR_PTR(-ENOMEM); + + return node; +} + +/** + * xe_mem_pool_insert_node() - Insert a node into the memory pool. + * @pool: the memory pool to insert into + * @node: the node to insert + * @size: the size of the node to be allocated in bytes. + * + * Inserts a node into the specified memory pool using drm_mm for + * allocation. + * + * Returns: 0 on success or a negative error code on failure. + */ +int xe_mem_pool_insert_node(struct xe_mem_pool *pool, + struct xe_mem_pool_node *node, u32 size) +{ + if (!pool) + return -EINVAL; + + return drm_mm_insert_node(&pool->base, &node->sa_node, size); +} + +/** + * xe_mem_pool_free_node() - Free a node allocated from the memory pool. + * @node: the node to free + * + * Returns: None. + */ +void xe_mem_pool_free_node(struct xe_mem_pool_node *node) +{ + if (!node) + return; + + drm_mm_remove_node(&node->sa_node); + kfree(node); +} + +/** + * xe_mem_pool_node_cpu_addr() - Retrieve CPU address of the node. + * @node: the node allocated in the memory pool + * + * Returns: CPU virtual address of the node. + */ +void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + + return xe_mem_pool_cpu_addr(pool) + node->sa_node.start; +} + +/** + * xe_mem_pool_dump() - Dump the state of the DRM MM manager for debugging. + * @pool: the memory pool info be dumped. + * @p: The DRM printer to use for output. + * + * Only the drm managed region is dumped, not the state of the BOs or any other + * pool information. + * + * Returns: None. + */ +void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p) +{ + drm_mm_print(&pool->base, p); +} diff --git a/drivers/gpu/drm/xe/xe_mem_pool.h b/drivers/gpu/drm/xe/xe_mem_pool.h new file mode 100644 index 000000000000..89cd2555fe91 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mem_pool.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ +#ifndef _XE_MEM_POOL_H_ +#define _XE_MEM_POOL_H_ + +#include <linux/sizes.h> +#include <linux/types.h> + +#include <drm/drm_mm.h> +#include "xe_mem_pool_types.h" + +struct drm_printer; +struct xe_mem_pool; +struct xe_tile; + +struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size, + u32 guard, int flags); +void xe_mem_pool_sync(struct xe_mem_pool *pool); +void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool); +void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node); +u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool); +void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool); +struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool); +void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node); +void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node); +struct xe_mem_pool_node *xe_mem_pool_alloc_node(void); +int xe_mem_pool_insert_node(struct xe_mem_pool *pool, + struct xe_mem_pool_node *node, u32 size); +void xe_mem_pool_free_node(struct xe_mem_pool_node *node); +void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node); +void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_mem_pool_types.h b/drivers/gpu/drm/xe/xe_mem_pool_types.h new file mode 100644 index 000000000000..d5e926c93351 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mem_pool_types.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef _XE_MEM_POOL_TYPES_H_ +#define _XE_MEM_POOL_TYPES_H_ + +#include <drm/drm_mm.h> + +#define XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY BIT(0) + +/** + * struct xe_mem_pool_node - Sub-range allocations from mem pool. + */ +struct xe_mem_pool_node { + /** @sa_node: drm_mm_node for this allocation. */ + struct drm_mm_node sa_node; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index fc918b4fba54..5fdc89ed5256 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -29,6 +29,7 @@ #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_map.h" +#include "xe_mem_pool.h" #include "xe_mocs.h" #include "xe_printk.h" #include "xe_pt.h" @@ -1166,11 +1167,12 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, u32 batch_size, batch_size_allocated; struct xe_device *xe = gt_to_xe(gt); struct xe_res_cursor src_it, ccs_it; + struct xe_mem_pool *bb_pool; struct xe_sriov_vf_ccs_ctx *ctx; - struct xe_sa_manager *bb_pool; u64 size = xe_bo_size(src_bo); - struct xe_bb *bb = NULL; + struct xe_mem_pool_node *bb; u64 src_L0, src_L0_ofs; + struct xe_bb xe_bb_tmp; u32 src_L0_pt; int err; @@ -1208,18 +1210,18 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, size -= src_L0; } - bb = xe_bb_alloc(gt); + bb = xe_mem_pool_alloc_node(); if (IS_ERR(bb)) return PTR_ERR(bb); bb_pool = ctx->mem.ccs_bb_pool; - scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) { - xe_sa_bo_swap_shadow(bb_pool); + scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) { + xe_mem_pool_swap_shadow_locked(bb_pool); - err = xe_bb_init(bb, bb_pool, batch_size); + err = xe_mem_pool_insert_node(bb_pool, bb, batch_size * sizeof(u32)); if (err) { xe_gt_err(gt, "BB allocation failed.\n"); - xe_bb_free(bb, NULL); + kfree(bb); return err; } @@ -1227,6 +1229,7 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, size = xe_bo_size(src_bo); batch_size = 0; + xe_bb_tmp = (struct xe_bb){ .cs = xe_mem_pool_node_cpu_addr(bb), .len = 0 }; /* * Emit PTE and copy commands here. * The CCS copy command can only support limited size. If the size to be @@ -1255,24 +1258,27 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); batch_size += EMIT_COPY_CCS_DW; - emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src); + emit_pte(m, &xe_bb_tmp, src_L0_pt, false, true, &src_it, src_L0, src); - emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); + emit_pte(m, &xe_bb_tmp, ccs_pt, false, false, &ccs_it, ccs_size, src); - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); - flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, + xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len, + flush_flags); + flush_flags = xe_migrate_ccs_copy(m, &xe_bb_tmp, src_L0_ofs, src_is_pltt, src_L0_ofs, dst_is_pltt, src_L0, ccs_ofs, true); - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); + xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len, + flush_flags); size -= src_L0; } - xe_assert(xe, (batch_size_allocated == bb->len)); + xe_assert(xe, (batch_size_allocated == xe_bb_tmp.len)); + xe_assert(xe, bb->sa_node.size == xe_bb_tmp.len * sizeof(u32)); src_bo->bb_ccs[read_write] = bb; xe_sriov_vf_ccs_rw_update_bb_addr(ctx); - xe_sa_bo_sync_shadow(bb->bo); + xe_mem_pool_sync_shadow_locked(bb); } return 0; @@ -1297,10 +1303,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, enum xe_sriov_vf_ccs_rw_ctxs read_write) { - struct xe_bb *bb = src_bo->bb_ccs[read_write]; + struct xe_mem_pool_node *bb = src_bo->bb_ccs[read_write]; struct xe_device *xe = xe_bo_device(src_bo); + struct xe_mem_pool *bb_pool; struct xe_sriov_vf_ccs_ctx *ctx; - struct xe_sa_manager *bb_pool; u32 *cs; xe_assert(xe, IS_SRIOV_VF(xe)); @@ -1308,17 +1314,17 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, ctx = &xe->sriov.vf.ccs.contexts[read_write]; bb_pool = ctx->mem.ccs_bb_pool; - guard(mutex) (xe_sa_bo_swap_guard(bb_pool)); - xe_sa_bo_swap_shadow(bb_pool); - - cs = xe_sa_bo_cpu_addr(bb->bo); - memset(cs, MI_NOOP, bb->len * sizeof(u32)); - xe_sriov_vf_ccs_rw_update_bb_addr(ctx); + scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) { + xe_mem_pool_swap_shadow_locked(bb_pool); - xe_sa_bo_sync_shadow(bb->bo); + cs = xe_mem_pool_node_cpu_addr(bb); + memset(cs, MI_NOOP, bb->sa_node.size); + xe_sriov_vf_ccs_rw_update_bb_addr(ctx); - xe_bb_free(bb, NULL); - src_bo->bb_ccs[read_write] = NULL; + xe_mem_pool_sync_shadow_locked(bb); + xe_mem_pool_free_node(bb); + src_bo->bb_ccs[read_write] = NULL; + } } /** diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 01673d2b2464..9f98d0334164 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -118,6 +118,7 @@ static const struct xe_graphics_desc graphics_xe2 = { static const struct xe_graphics_desc graphics_xe3p_lpg = { XE2_GFX_FEATURES, + .has_indirect_ring_state = 1, .multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE), .num_geometry_xecore_fuse_regs = 3, .num_compute_xecore_fuse_regs = 3, diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 80577e4b7437..8cc313182968 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -226,7 +226,7 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, } range_start = reg & REG_GENMASK(25, range_bit); - range_end = range_start | REG_GENMASK(range_bit, 0); + range_end = range_start | REG_GENMASK(range_bit - 1, 0); switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) { case RING_FORCE_TO_NONPRIV_ACCESS_RW: diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c index db023fb66a27..09b99fb2608b 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -14,9 +14,9 @@ #include "xe_guc.h" #include "xe_guc_submit.h" #include "xe_lrc.h" +#include "xe_mem_pool.h" #include "xe_migrate.h" #include "xe_pm.h" -#include "xe_sa.h" #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" #include "xe_sriov_vf_ccs.h" @@ -141,43 +141,47 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe) static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) { + struct xe_mem_pool *pool; struct xe_device *xe = tile_to_xe(tile); - struct xe_sa_manager *sa_manager; + u32 *pool_cpu_addr, *last_dw_addr; u64 bb_pool_size; - int offset, err; + int err; bb_pool_size = get_ccs_bb_pool_size(xe); xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n", ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M); - sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16, - XE_SA_BO_MANAGER_FLAG_SHADOW); - - if (IS_ERR(sa_manager)) { - xe_sriov_err(xe, "Suballocator init failed with error: %pe\n", - sa_manager); - err = PTR_ERR(sa_manager); + pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32), + XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY); + if (IS_ERR(pool)) { + xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n", + pool); + err = PTR_ERR(pool); return err; } - offset = 0; - xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP, - bb_pool_size); - xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP, - bb_pool_size); + pool_cpu_addr = xe_mem_pool_cpu_addr(pool); + memset(pool_cpu_addr, 0, bb_pool_size); - offset = bb_pool_size - sizeof(u32); - xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END); - xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END); + last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1; + *last_dw_addr = MI_BATCH_BUFFER_END; - ctx->mem.ccs_bb_pool = sa_manager; + /** + * Sync the main copy and shadow copy so that the shadow copy is + * replica of main copy. We sync only BBs after init part. So, we + * need to make sure the main pool and shadow copy are in sync after + * this point. This is needed as GuC may read the BB commands from + * shadow copy. + */ + xe_mem_pool_sync(pool); + ctx->mem.ccs_bb_pool = pool; return 0; } static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx) { - u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool); struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); u32 dw[10], i = 0; @@ -388,7 +392,7 @@ err_ret: #define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32)) void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx) { - u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool); struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); struct xe_device *xe = gt_to_xe(ctx->mig_q->gt); @@ -412,8 +416,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo) struct xe_device *xe = xe_bo_device(bo); enum xe_sriov_vf_ccs_rw_ctxs ctx_id; struct xe_sriov_vf_ccs_ctx *ctx; + struct xe_mem_pool_node *bb; struct xe_tile *tile; - struct xe_bb *bb; int err = 0; xe_assert(xe, IS_VF_CCS_READY(xe)); @@ -445,7 +449,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) { struct xe_device *xe = xe_bo_device(bo); enum xe_sriov_vf_ccs_rw_ctxs ctx_id; - struct xe_bb *bb; + struct xe_mem_pool_node *bb; xe_assert(xe, IS_VF_CCS_READY(xe)); @@ -471,8 +475,8 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) */ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) { - struct xe_sa_manager *bb_pool; enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_mem_pool *bb_pool; if (!IS_VF_CCS_READY(xe)) return; @@ -485,7 +489,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); drm_printf(p, "-------------------------\n"); - drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); + xe_mem_pool_dump(bb_pool, p); drm_puts(p, "\n"); } } diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h index 22c499943d2a..6fc8f97ef3f4 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h @@ -17,9 +17,6 @@ enum xe_sriov_vf_ccs_rw_ctxs { XE_SRIOV_VF_CCS_CTX_COUNT }; -struct xe_migrate; -struct xe_sa_manager; - /** * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data. */ @@ -33,7 +30,7 @@ struct xe_sriov_vf_ccs_ctx { /** @mem: memory data */ struct { /** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */ - struct xe_sa_manager *ccs_bb_pool; + struct xe_mem_pool *ccs_bb_pool; } mem; }; diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index f8de6a4bf189..0b78ec2bc6a4 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -97,7 +97,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { { XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED), IS_INTEGRATED), - XE_RTP_ACTIONS(FIELD_SET(XEHP_GAMSTLB_CTRL, BANK_HASH_MODE, + XE_RTP_ACTIONS(FIELD_SET(GAMSTLB_CTRL, BANK_HASH_MODE, BANK_HASH_4KB_MODE)) }, }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 56e2db50bb36..a717a2b8dea3 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3658,6 +3658,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE && + is_cpu_addr_mirror) || XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) && (op == DRM_XE_VM_BIND_OP_MAP_USERPTR || is_cpu_addr_mirror) && @@ -4156,7 +4158,8 @@ int xe_vm_get_property_ioctl(struct drm_device *drm, void *data, int ret = 0; if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] || - args->reserved[2]))) + args->reserved[2] || args->extensions || + args->pad))) return -EINVAL; vm = xe_vm_lookup(xef, args->vm_id); diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 66f00d3f5c07..c78906dea82b 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -621,6 +621,45 @@ static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details return 0; } +static bool check_pat_args_are_sane(struct xe_device *xe, + struct xe_vmas_in_madvise_range *madvise_range, + u16 pat_index) +{ + u16 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + int i; + + /* + * Using coh_none with CPU cached buffers is not allowed on iGPU. + * On iGPU the GPU shares the LLC with the CPU, so with coh_none + * the GPU bypasses CPU caches and reads directly from DRAM, + * potentially seeing stale sensitive data from previously freed + * pages. On dGPU this restriction does not apply, because the + * platform does not provide a non-coherent system memory access + * path that would violate the DMA coherency contract. + */ + if (coh_mode != XE_COH_NONE || IS_DGFX(xe)) + return true; + + for (i = 0; i < madvise_range->num_vmas; i++) { + struct xe_vma *vma = madvise_range->vmas[i]; + struct xe_bo *bo = xe_vma_bo(vma); + + if (bo) { + /* BO with WB caching + COH_NONE is not allowed */ + if (XE_IOCTL_DBG(xe, bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) + return false; + /* Imported dma-buf without caching info, assume cached */ + if (XE_IOCTL_DBG(xe, !bo->cpu_caching)) + return false; + } else if (XE_IOCTL_DBG(xe, xe_vma_is_cpu_addr_mirror(vma) || + xe_vma_is_userptr(vma))) + /* System memory (userptr/SVM) is always CPU cached */ + return false; + } + + return true; +} + static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, u32 atomic_val) { @@ -750,6 +789,14 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil } } + if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) { + if (!check_pat_args_are_sane(xe, &madvise_range, + args->pat_index.val)) { + err = -EINVAL; + goto free_vmas; + } + } + if (madvise_range.has_bo_vmas) { if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { if (!check_bo_args_are_sane(vm, madvise_range.vmas, diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 546296f0220b..4b1cbced06be 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -743,14 +743,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) }, - { XE_RTP_NAME("14019988906"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) - }, - { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) - }, { XE_RTP_NAME("14021490052"), XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(FF_MODE, |
