summaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/amdxdna/aie2_message.c6
-rw-r--r--drivers/accel/amdxdna/aie2_msg_priv.h10
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c4
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h1
-rw-r--r--drivers/accel/ivpu/ivpu_fw.h1
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c6
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c20
-rw-r--r--drivers/accel/qaic/qaic_data.c8
8 files changed, 40 insertions, 16 deletions
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index bf4219e32cc1..82412eec9a4b 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -525,7 +525,7 @@ aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
if (!payload)
return -EINVAL;
- if (!slot_cf_has_space(offset, payload_len))
+ if (!slot_has_space(*buf, offset, payload_len))
return -ENOSPC;
buf->cu_idx = cu_idx;
@@ -558,7 +558,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
return -EINVAL;
- if (!slot_dpu_has_space(offset, arg_sz))
+ if (!slot_has_space(*buf, offset, arg_sz))
return -ENOSPC;
buf->inst_buf_addr = sn->buffer;
@@ -569,7 +569,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
memcpy(buf->args, sn->prop_args, arg_sz);
/* Accurate buf size to hint firmware to do necessary copy */
- *size += sizeof(*buf) + arg_sz;
+ *size = sizeof(*buf) + arg_sz;
return 0;
}
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
index 4e02e744b470..6df9065b13f6 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -319,18 +319,16 @@ struct async_event_msg_resp {
} __packed;
#define MAX_CHAIN_CMDBUF_SIZE SZ_4K
-#define slot_cf_has_space(offset, payload_size) \
- (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
- offsetof(struct cmd_chain_slot_execbuf_cf, args[0]))
+#define slot_has_space(slot, offset, payload_size) \
+ (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \
+ sizeof(typeof(slot)))
+
struct cmd_chain_slot_execbuf_cf {
__u32 cu_idx;
__u32 arg_cnt;
__u32 args[] __counted_by(arg_cnt);
};
-#define slot_dpu_has_space(offset, payload_size) \
- (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
- offsetof(struct cmd_chain_slot_dpu, args[0]))
struct cmd_chain_slot_dpu {
__u64 inst_buf_addr;
__u32 inst_size;
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 4fa73189502e..392d6790b4cd 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -374,6 +374,9 @@ int ivpu_boot(struct ivpu_device *vdev)
{
int ret;
+ drm_WARN_ON(&vdev->drm, atomic_read(&vdev->job_timeout_counter));
+ drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
+
/* Update boot params located at first 4KB of FW memory */
ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem));
@@ -573,6 +576,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
atomic64_set(&vdev->unique_id_counter, 0);
+ atomic_set(&vdev->job_timeout_counter, 0);
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 92753effb1c9..5497e7030e91 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -154,6 +154,7 @@ struct ivpu_device {
struct mutex submitted_jobs_lock; /* Protects submitted_jobs */
struct xarray submitted_jobs_xa;
struct ivpu_ipc_consumer job_done_consumer;
+ atomic_t job_timeout_counter;
atomic64_t unique_id_counter;
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
index 1d0b2bd9d65c..9a3935be1c05 100644
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -39,6 +39,7 @@ struct ivpu_fw_info {
u64 read_only_addr;
u32 read_only_size;
u32 sched_mode;
+ u64 last_heartbeat;
};
int ivpu_fw_init(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index 212d21ad2bbd..e0d242d9f3e5 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -30,7 +30,7 @@ static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, con
"%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n",
action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0,
(bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc,
- (bool)bo->base.base.import_attach);
+ (bool)drm_gem_is_imported(&bo->base.base));
}
/*
@@ -122,7 +122,7 @@ static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
bo->ctx = NULL;
}
- if (bo->base.base.import_attach)
+ if (drm_gem_is_imported(&bo->base.base))
return;
dma_resv_lock(bo->base.base.resv, NULL);
@@ -461,7 +461,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
if (bo->mmu_mapped)
drm_printf(p, " mmu_mapped");
- if (bo->base.base.import_attach)
+ if (drm_gem_is_imported(&bo->base.base))
drm_printf(p, " imported");
drm_printf(p, "\n");
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index b5891e91f7ab..1fe03fc16bbc 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -34,6 +34,7 @@ module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
#define PM_RESCHEDULE_LIMIT 5
+#define PM_TDR_HEARTBEAT_LIMIT 30
static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
{
@@ -44,6 +45,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
ivpu_fw_log_reset(vdev);
ivpu_fw_load(vdev);
fw->entry_point = fw->cold_boot_entry_point;
+ fw->last_heartbeat = 0;
}
static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev)
@@ -189,7 +191,24 @@ static void ivpu_job_timeout_work(struct work_struct *work)
{
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
struct ivpu_device *vdev = pm->vdev;
+ u64 heartbeat;
+ if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) {
+ ivpu_err(vdev, "Job timeout detected, heartbeat not progressed\n");
+ goto recovery;
+ }
+
+ if (atomic_fetch_inc(&vdev->job_timeout_counter) > PM_TDR_HEARTBEAT_LIMIT) {
+ ivpu_err(vdev, "Job timeout detected, heartbeat limit exceeded\n");
+ goto recovery;
+ }
+
+ vdev->fw->last_heartbeat = heartbeat;
+ ivpu_start_job_timeout_detection(vdev);
+ return;
+
+recovery:
+ atomic_set(&vdev->job_timeout_counter, 0);
ivpu_pm_trigger_recovery(vdev, "TDR");
}
@@ -204,6 +223,7 @@ void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
{
cancel_delayed_work_sync(&vdev->pm->job_timeout_work);
+ atomic_set(&vdev->job_timeout_counter, 0);
}
int ivpu_pm_suspend_cb(struct device *dev)
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index 43aba57b48f0..1bce1af7c72c 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -609,7 +609,7 @@ static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struc
struct scatterlist *sg;
int ret = 0;
- if (obj->import_attach)
+ if (drm_gem_is_imported(obj))
return -EINVAL;
for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) {
@@ -630,7 +630,7 @@ static void qaic_free_object(struct drm_gem_object *obj)
{
struct qaic_bo *bo = to_qaic_bo(obj);
- if (obj->import_attach) {
+ if (drm_gem_is_imported(obj)) {
/* DMABUF/PRIME Path */
drm_prime_gem_destroy(obj, NULL);
} else {
@@ -870,7 +870,7 @@ static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo,
{
int ret;
- if (bo->base.import_attach)
+ if (drm_gem_is_imported(&bo->base))
ret = qaic_prepare_import_bo(bo, hdr);
else
ret = qaic_prepare_export_bo(qdev, bo, hdr);
@@ -894,7 +894,7 @@ static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *b
static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo)
{
- if (bo->base.import_attach)
+ if (drm_gem_is_imported(&bo->base))
qaic_unprepare_import_bo(bo);
else
qaic_unprepare_export_bo(qdev, bo);