diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_lrc.c')
| -rw-r--r-- | drivers/gpu/drm/xe/xe_lrc.c | 110 |
1 files changed, 96 insertions, 14 deletions
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 8f6c3ba47882..b5083c99dd50 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -8,6 +8,7 @@ #include <generated/xe_wa_oob.h> #include <linux/ascii85.h> +#include <linux/panic.h> #include "instructions/xe_mi_commands.h" #include "instructions/xe_gfxpipe_commands.h" @@ -16,6 +17,7 @@ #include "regs/xe_lrc_layout.h" #include "xe_bb.h" #include "xe_bo.h" +#include "xe_configfs.h" #include "xe_device.h" #include "xe_drm_client.h" #include "xe_exec_queue_types.h" @@ -75,11 +77,17 @@ lrc_to_xe(struct xe_lrc *lrc) static bool gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) { + struct xe_device *xe = gt_to_xe(gt); + if (XE_GT_WA(gt, 16010904313) && (class == XE_ENGINE_CLASS_RENDER || class == XE_ENGINE_CLASS_COMPUTE)) return true; + if (xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev), + class, NULL)) + return true; + return false; } @@ -1102,6 +1110,64 @@ static ssize_t setup_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe, return cmd - batch; } +static ssize_t setup_configfs_post_ctx_restore_bb(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + const u32 *user_batch; + u32 *cmd = batch; + u32 count; + + count = xe_configfs_get_ctx_restore_post_bb(to_pci_dev(xe->drm.dev), + hwe->class, &user_batch); + if (!count) + return 0; + + if (count > max_len) + return -ENOSPC; + + /* + * This should be used only for tests and validation. Taint the kernel + * as anything could be submitted directly in context switches + */ + add_taint(TAINT_TEST, LOCKDEP_STILL_OK); + + memcpy(cmd, user_batch, count * sizeof(u32)); + cmd += count; + + return cmd - batch; +} + +static ssize_t setup_configfs_mid_ctx_restore_bb(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, size_t max_len) +{ + struct xe_device *xe = gt_to_xe(lrc->gt); + const u32 *user_batch; + u32 *cmd = batch; + u32 count; + + count = xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev), + hwe->class, &user_batch); + if (!count) + return 0; + + if (count > max_len) + return -ENOSPC; + + /* + * This should be used only for tests and validation. Taint the kernel + * as anything could be submitted directly in context switches + */ + add_taint(TAINT_TEST, LOCKDEP_STILL_OK); + + memcpy(cmd, user_batch, count * sizeof(u32)); + cmd += count; + + return cmd - batch; +} + static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *batch, size_t max_len) @@ -1148,8 +1214,7 @@ static int setup_bo(struct bo_setup_state *state) ssize_t remain; if (state->lrc->bo->vmap.is_iomem) { - if (!state->buffer) - return -ENOMEM; + xe_gt_assert(state->hwe->gt, state->buffer); state->ptr = state->buffer; } else { state->ptr = state->lrc->bo->vmap.vaddr + state->offset; @@ -1182,7 +1247,7 @@ fail: static void finish_bo(struct bo_setup_state *state) { - if (!state->buffer) + if (!state->lrc->bo->vmap.is_iomem) return; xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, @@ -1203,6 +1268,7 @@ int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe { .setup = setup_timestamp_wa }, { .setup = setup_invalidate_state_cache_wa }, { .setup = setup_utilization_wa }, + { .setup = setup_configfs_post_ctx_restore_bb }, }; struct bo_setup_state state = { .lrc = lrc, @@ -1236,8 +1302,11 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) u32 *buf = NULL; int ret; - if (lrc->bo->vmap.is_iomem) + if (lrc->bo->vmap.is_iomem) { buf = kmalloc(LRC_WA_BB_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } ret = xe_lrc_setup_wa_bb_with_scratch(lrc, hwe, buf); @@ -1249,8 +1318,12 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) static int setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) { - static struct bo_setup rcs_funcs[] = { + static const struct bo_setup rcs_funcs[] = { { .setup = setup_timestamp_wa }, + { .setup = setup_configfs_mid_ctx_restore_bb }, + }; + static const struct bo_setup xcs_funcs[] = { + { .setup = setup_configfs_mid_ctx_restore_bb }, }; struct bo_setup_state state = { .lrc = lrc, @@ -1268,13 +1341,19 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) hwe->class == XE_ENGINE_CLASS_COMPUTE) { state.funcs = rcs_funcs; state.num_funcs = ARRAY_SIZE(rcs_funcs); + } else { + state.funcs = xcs_funcs; + state.num_funcs = ARRAY_SIZE(xcs_funcs); } if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) return 0; - if (lrc->bo->vmap.is_iomem) + if (lrc->bo->vmap.is_iomem) { state.buffer = kmalloc(state.max_size, GFP_KERNEL); + if (!state.buffer) + return -ENOMEM; + } ret = setup_bo(&state); if (ret) { @@ -1294,14 +1373,15 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) finish_bo(&state); kfree(state.buffer); + /* + * Enable INDIRECT_CTX leaving INDIRECT_CTX_OFFSET at its default: it + * varies per engine class, but the default is good enough + */ xe_lrc_write_ctx_reg(lrc, CTX_CS_INDIRECT_CTX, (xe_bo_ggtt_addr(lrc->bo) + state.offset) | /* Size in CLs. */ (state.written * sizeof(u32) / 64)); - xe_lrc_write_ctx_reg(lrc, - CTX_CS_INDIRECT_CTX_OFFSET, - CTX_INDIRECT_CTX_OFFSET_DEFAULT); return 0; } @@ -1337,12 +1417,14 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE; - if (vm && vm->xef) /* userspace */ - bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, - ttm_bo_type_kernel, - bo_flags); + if ((vm && vm->xef) || init_flags & XE_LRC_CREATE_USER_CTX) /* userspace */ + bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM; + + lrc->bo = xe_bo_create_pin_map_novm(xe, tile, + bo_size, + ttm_bo_type_kernel, + bo_flags, false); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); |
