diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
24 files changed, 581 insertions, 584 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8bc591deb546..fd50da4c7b18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1190,7 +1190,6 @@ struct amdgpu_device { bool apu_prefer_gtt; bool userq_halt_for_enforce_isolation; - struct work_struct userq_reset_work; struct amdgpu_uid *uid_info; struct amdgpu_uma_carveout_info uma_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d9e283f3b57d..9783a3cefb04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -36,6 +36,9 @@ #include "amdgpu_ras.h" #include "amdgpu_umc.h" #include "amdgpu_reset.h" +#if IS_ENABLED(CONFIG_HSA_AMD) +#include "kfd_priv.h" +#endif /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables @@ -320,6 +323,28 @@ void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) (void)amdgpu_reset_domain_schedule(adev->reset_domain, &adev->kfd.reset_work); } +void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev) +{ +#if IS_ENABLED(CONFIG_HSA_AMD) + struct kfd_dev *kfd = adev->kfd.dev; + unsigned int i; + + if (!kfd) + return; + + for (i = 0; i < kfd->num_nodes; i++) { + struct kfd_node *node = kfd->nodes[i]; + + kfd_dev_unmap_mapping_range(KFD_MMAP_TYPE_DOORBELL | + KFD_MMAP_GPU_ID(node->id), + kfd_doorbell_process_slice(kfd)); + kfd_dev_unmap_mapping_range(KFD_MMAP_TYPE_MMIO | + KFD_MMAP_GPU_ID(node->id), + PAGE_SIZE); + } +#endif +} + int amdgpu_amdkfd_alloc_kernel_mem(struct amdgpu_device *adev, size_t size, u32 domain, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool cp_mqd_gfx9) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index cdbab7f8cee8..2b4108f83f48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -358,6 +358,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); +void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev); u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 66ca043658ff..feab90e3efd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3787,7 +3787,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, } INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); - INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work); amdgpu_coredump_init(adev); @@ -5478,7 +5477,7 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) cancel_work(&adev->reset_work); #endif - cancel_work(&adev->userq_reset_work); + amdgpu_userq_mgr_cancel_reset_work(adev); if (adev->kfd.dev) cancel_work(&adev->kfd.reset_work); @@ -5836,6 +5835,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* We need to lock reset domain only once both for XGMI and single device */ amdgpu_device_recovery_get_reset_lock(adev, &device_list); + /* unmap all the mappings of doorbell and framebuffer to prevent user space from + * accessing them + */ + unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); + amdgpu_amdkfd_clear_kfd_mapping(adev); + amdgpu_device_halt_activities(adev, job, reset_context, &device_list, hive, need_emergency_restart); if (need_emergency_restart) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 8d99bfaa498f..80efeca0ab73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -304,7 +304,7 @@ static int amdgpu_discovery_get_tmr_info(struct amdgpu_device *adev, adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset; adev->discovery.size = adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb << 10; - if (!adev->discovery.offset || !adev->discovery.size) + if (!adev->discovery.size) return -EINVAL; } else { goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 5376035d32fe..fe6d988e7f24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -31,6 +31,7 @@ #include <linux/pci.h> #include <linux/dma-buf.h> #include <linux/dma-fence-unwrap.h> +#include <linux/uaccess.h> #include <drm/amdgpu_drm.h> #include <drm/drm_drv.h> @@ -508,6 +509,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (offset_in_page(args->addr | args->size)) return -EINVAL; + if (!access_ok((void __user *)(uintptr_t)args->addr, args->size)) + return -EFAULT; + /* reject unknown flag values */ if (args->flags & ~(AMDGPU_GEM_USERPTR_READONLY | AMDGPU_GEM_USERPTR_ANONONLY | AMDGPU_GEM_USERPTR_VALIDATE | @@ -821,7 +825,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_syncobj *timeline_syncobj = NULL; struct dma_fence_chain *timeline_chain = NULL; struct drm_exec exec; - uint64_t vm_size; + uint64_t vm_size, tmp; int r = 0; /* Validate virtual address range against reserved regions. */ @@ -845,7 +849,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; vm_size -= AMDGPU_VA_RESERVED_TOP; - if (args->va_address + args->map_size > vm_size) { + if (check_add_overflow(args->va_address, args->map_size, &tmp) || tmp > vm_size) { dev_dbg(dev->dev, "va_address 0x%llx is in top reserved area 0x%llx\n", args->va_address + args->map_size, vm_size); @@ -1089,9 +1093,16 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, * If that number is larger than the size of the array, the ioctl must * be retried. */ + if (args->num_entries > INT_MAX / sizeof(*vm_entries)) { + r = -EINVAL; + goto out_exec; + } + vm_entries = kvcalloc(args->num_entries, sizeof(*vm_entries), GFP_KERNEL); - if (!vm_entries) - return -ENOMEM; + if (!vm_entries) { + r = -ENOMEM; + goto out_exec; + } amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) { if (num_mappings < args->num_entries) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 620fddde4c4d..a5d26b943f6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -199,11 +199,18 @@ int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr, enum drm_mm_insert_mode mode) { struct amdgpu_device *adev = container_of(mgr, typeof(*adev), mman.gtt_mgr); + u32 alignment = 0; int r; + /* Align to TLB L2 cache entry size to work around "V bit HW bug" */ + if (adev->asic_type == CHIP_TAHITI) { + alignment = 32 * 1024 / AMDGPU_GPU_PAGE_SIZE; + num_pages = ALIGN(num_pages, alignment); + } + spin_lock(&mgr->lock); r = drm_mm_insert_node_in_range(&mgr->mm, mm_node, num_pages, - 0, GART_ENTRY_WITHOUT_BO_COLOR, 0, + alignment, GART_ENTRY_WITHOUT_BO_COLOR, 0, adev->gmc.gart_size >> PAGE_SHIFT, mode); spin_unlock(&mgr->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index f72990ac046e..5bfa5a84b09c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -51,8 +51,6 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_hmm.h" -#define MAX_WALK_BYTE (2UL << 30) - /** * amdgpu_hmm_invalidate_gfx - callback to notify about mm change * @@ -78,6 +76,7 @@ static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); + amdgpu_vm_bo_invalidate(bo, false); r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); @@ -170,11 +169,13 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, void *owner, struct amdgpu_hmm_range *range) { - unsigned long end; + const u64 max_bytes = SZ_2G; + + struct hmm_range *hmm_range = &range->hmm_range; unsigned long timeout; unsigned long *pfns; - int r = 0; - struct hmm_range *hmm_range = &range->hmm_range; + unsigned long end; + int r; pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); if (unlikely(!pfns)) { @@ -191,8 +192,9 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, end = start + npages * PAGE_SIZE; hmm_range->dev_private_owner = owner; + hmm_range->notifier_seq = mmu_interval_read_begin(notifier); do { - hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end); + hmm_range->end = min(hmm_range->start + max_bytes, end); pr_debug("hmm range: start = 0x%lx, end = 0x%lx", hmm_range->start, hmm_range->end); @@ -200,7 +202,6 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); retry: - hmm_range->notifier_seq = mmu_interval_read_begin(notifier); r = hmm_range_fault(hmm_range); if (unlikely(r)) { if (r == -EBUSY && !time_after(jiffies, timeout)) @@ -210,7 +211,7 @@ retry: if (hmm_range->end == end) break; - hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT; + hmm_range->hmm_pfns += max_bytes >> PAGE_SHIFT; hmm_range->start = hmm_range->end; } while (hmm_range->end < end); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 912c9afaf9e1..4d68732d6223 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -96,7 +96,8 @@ struct amdgpu_bo_va { * if non-zero, cannot unmap from GPU because user queues may still access it */ unsigned int queue_refcount; - atomic_t userq_va_mapped; + /* Indicates if this buffer is mapped for any user queue. Once set, never reset. */ + bool userq_va_mapped; }; struct amdgpu_bo { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 6c644cfe6695..fc9f3adf9912 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2280,7 +2280,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) list_for_each_entry(obj, &con->head, node) { if (amdgpu_ras_is_supported(adev, obj->head.block) && (obj->attr_inuse == 1)) { - sprintf(fs_info.debugfs_name, "%s_err_inject", + snprintf(fs_info.debugfs_name, sizeof(fs_info.debugfs_name), + "%s_err_inject", get_ras_block_str(&obj->head)); fs_info.head = obj->head; amdgpu_ras_debugfs_create(adev, &fs_info, dir); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 66e8a2f7afcf..d6bee5c30073 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -552,8 +552,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { struct amdgpu_ring *ring = file_inode(f)->i_private; - uint32_t value, result, early[3]; + u32 value, result, early[3] = { 0 }; uint64_t p; + u32 avail_dw, start_dw, read_dw; loff_t i; int r; @@ -565,10 +566,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, result = 0; - if (*pos < 12) { - if (ring->funcs->type == AMDGPU_RING_TYPE_CPER) - mutex_lock(&ring->adev->cper.ring_lock); + if (ring->funcs->type == AMDGPU_RING_TYPE_CPER) + mutex_lock(&ring->adev->cper.ring_lock); + if (*pos < 12) { early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; early[2] = ring->wptr & ring->buf_mask; @@ -600,13 +601,24 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, *pos += 4; } } else { + early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; + early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; + p = early[0]; if (early[0] <= early[1]) - size = (early[1] - early[0]); + avail_dw = early[1] - early[0]; else - size = ring->ring_size - (early[0] - early[1]); + avail_dw = ring->buf_mask + 1 - (early[0] - early[1]); - while (size) { + start_dw = (*pos > 12) ? ((*pos - 12) >> 2) : 0; + if (start_dw >= avail_dw) + goto out; + + p = (p + start_dw) & ring->ptr_mask; + avail_dw -= start_dw; + read_dw = min_t(u32, avail_dw, size >> 2); + + while (read_dw) { if (p == early[1]) goto out; @@ -619,9 +631,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, buf += 4; result += 4; - size--; + read_dw--; p++; p &= ring->ptr_mask; + *pos += 4; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index a0b479d5fff1..f4be19223588 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -175,11 +175,14 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, { unsigned long bit_pos; - bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); - if (bit_pos >= adev->seq64.num_sem) - return -ENOSPC; + for (;;) { + bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); + if (bit_pos >= adev->seq64.num_sem) + return -ENOSPC; - __set_bit(bit_pos, adev->seq64.used); + if (!test_and_set_bit(bit_pos, adev->seq64.used)) + break; + } *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev); @@ -205,7 +208,7 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va) bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64); if (bit_pos < adev->seq64.num_sem) - __clear_bit(bit_pos, adev->seq64.used); + clear_bit(bit_pos, adev->seq64.used); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 0238c2798de4..b8ed931f8a40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -130,6 +130,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && adev->umc.ras->ras_block.hw_ops->query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { + kfree(err_data->err_addr); err_data->err_addr = kzalloc_objs(struct eeprom_table_record, adev->umc.max_ras_err_cnt_per_query); @@ -160,6 +161,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if (adev->umc.ras && adev->umc.ras->ecc_info_query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { + kfree(err_data->err_addr); err_data->err_addr = kzalloc_objs(struct eeprom_table_record, adev->umc.max_ras_err_cnt_per_query); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index de140a8ed135..cf192500800f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -82,19 +82,11 @@ static bool amdgpu_userq_is_reset_type_supported(struct amdgpu_device *adev, return false; } -static void amdgpu_userq_gpu_reset(struct amdgpu_device *adev) -{ - if (amdgpu_device_should_recover_gpu(adev)) { - amdgpu_reset_domain_schedule(adev->reset_domain, - &adev->userq_reset_work); - /* Wait for the reset job to complete */ - flush_work(&adev->userq_reset_work); - } -} - -static int -amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) +static void amdgpu_userq_mgr_reset_work(struct work_struct *work) { + struct amdgpu_userq_mgr *uq_mgr = + container_of(work, struct amdgpu_userq_mgr, + reset_work); struct amdgpu_device *adev = uq_mgr->adev; const int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE, @@ -103,15 +95,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) }; const int num_queue_types = ARRAY_SIZE(queue_types); bool gpu_reset = false; - int r = 0; - int i; - - /* Warning if current process mutex is not held */ - WARN_ON(!mutex_is_locked(&uq_mgr->userq_mutex)); + int i, r; if (unlikely(adev->debug_disable_gpu_ring_reset)) { dev_err(adev->dev, "userq reset disabled by debug mask\n"); - return 0; + return; } /* @@ -119,7 +107,7 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) * skip all reset detection logic */ if (!amdgpu_gpu_recovery) - return 0; + return; /* * Iterate through all queue types to detect and reset problematic queues @@ -127,9 +115,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) */ for (i = 0; i < num_queue_types; i++) { int ring_type = queue_types[i]; - const struct amdgpu_userq_funcs *funcs = adev->userq_funcs[ring_type]; + const struct amdgpu_userq_funcs *funcs = + adev->userq_funcs[ring_type]; - if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, AMDGPU_RESET_TYPE_PER_QUEUE)) + if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, + AMDGPU_RESET_TYPE_PER_QUEUE)) continue; if (atomic_read(&uq_mgr->userq_count[ring_type]) > 0 && @@ -142,46 +132,43 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) } } - if (gpu_reset) - amdgpu_userq_gpu_reset(adev); + if (gpu_reset) { + struct amdgpu_reset_context reset_context; - return r; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + reset_context.src = AMDGPU_RESET_SRC_USERQ; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/ + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } } static void amdgpu_userq_hang_detect_work(struct work_struct *work) { - struct amdgpu_usermode_queue *queue = container_of(work, - struct amdgpu_usermode_queue, - hang_detect_work.work); - struct dma_fence *fence; - struct amdgpu_userq_mgr *uq_mgr; - - if (!queue->userq_mgr) - return; - - uq_mgr = queue->userq_mgr; - fence = READ_ONCE(queue->hang_detect_fence); - /* Fence already signaled – no action needed */ - if (!fence || dma_fence_is_signaled(fence)) - return; + struct amdgpu_usermode_queue *queue = + container_of(work, struct amdgpu_usermode_queue, + hang_detect_work.work); - mutex_lock(&uq_mgr->userq_mutex); - amdgpu_userq_detect_and_reset_queues(uq_mgr); - mutex_unlock(&uq_mgr->userq_mutex); + /* + * Don't schedule the work here! Scheduling or queue work from one reset + * handler to another is illegal if you don't take extra precautions! + */ + amdgpu_userq_mgr_reset_work(&queue->userq_mgr->reset_work); } /* * Start hang detection for a user queue fence. A delayed work will be scheduled - * to check if the fence is still pending after the timeout period. -*/ + * to reset the queues when the fence doesn't signal in time. + */ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev; unsigned long timeout_ms; - if (!queue || !queue->userq_mgr || !queue->userq_mgr->adev) - return; - adev = queue->userq_mgr->adev; /* Determine timeout based on queue type */ switch (queue->queue_type) { @@ -199,10 +186,8 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) break; } - /* Store the fence to monitor and schedule hang detection */ - WRITE_ONCE(queue->hang_detect_fence, queue->last_fence); - schedule_delayed_work(&queue->hang_detect_work, - msecs_to_jiffies(timeout_ms)); + queue_delayed_work(adev->reset_domain->wq, &queue->hang_detect_work, + msecs_to_jiffies(timeout_ms)); } void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell) @@ -210,47 +195,35 @@ void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell) struct xarray *xa = &adev->userq_doorbell_xa; struct amdgpu_usermode_queue *queue; unsigned long flags; + int r; xa_lock_irqsave(xa, flags); queue = xa_load(xa, doorbell); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - xa_unlock_irqrestore(xa, flags); -} - -static void amdgpu_userq_init_hang_detect_work(struct amdgpu_usermode_queue *queue) -{ - INIT_DELAYED_WORK(&queue->hang_detect_work, amdgpu_userq_hang_detect_work); - queue->hang_detect_fence = NULL; -} - -static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue, - struct amdgpu_bo_va_mapping *va_map, u64 addr) -{ - struct amdgpu_userq_va_cursor *va_cursor; - struct userq_va_list; - - va_cursor = kzalloc_obj(*va_cursor); - if (!va_cursor) - return -ENOMEM; - - INIT_LIST_HEAD(&va_cursor->list); - va_cursor->gpu_addr = addr; - atomic_set(&va_map->bo_va->userq_va_mapped, 1); - list_add(&va_cursor->list, &queue->userq_va_list); + if (queue) { + r = amdgpu_userq_fence_driver_process(queue->fence_drv); + /* + * We are in interrupt context here, this *can't* wait for + * reset work to finish. + */ + if (r >= 0) + cancel_delayed_work(&queue->hang_detect_work); - return 0; + /* Restart the timer when there are still fences pending */ + if (r == 1) + amdgpu_userq_start_hang_detect_work(queue); + } + xa_unlock_irqrestore(xa, flags); } int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue, - u64 addr, u64 expected_size) + u64 addr, u64 expected_size, + u64 *va_out) { struct amdgpu_bo_va_mapping *va_map; struct amdgpu_vm *vm = queue->vm; u64 user_addr; u64 size; - int r = 0; /* Caller must hold vm->root.bo reservation */ dma_resv_assert_held(queue->vm->root.bo->tbo.base.resv); @@ -259,20 +232,18 @@ int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, size = expected_size >> AMDGPU_GPU_PAGE_SHIFT; va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr); - if (!va_map) { - r = -EINVAL; - goto out_err; - } + if (!va_map) + return -EINVAL; + /* Only validate the userq whether resident in the VM mapping range */ if (user_addr >= va_map->start && va_map->last - user_addr + 1 >= size) { - amdgpu_userq_buffer_va_list_add(queue, va_map, user_addr); + va_map->bo_va->userq_va_mapped = true; + *va_out = user_addr; return 0; } - r = -EINVAL; -out_err: - return r; + return -EINVAL; } static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) @@ -283,7 +254,7 @@ static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) dma_resv_assert_held(vm->root.bo->tbo.base.resv); mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); - if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped)) + if (!IS_ERR_OR_NULL(mapping) && mapping->bo_va->userq_va_mapped) r = true; else r = false; @@ -293,14 +264,16 @@ static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue) { - struct amdgpu_userq_va_cursor *va_cursor, *tmp; - int r = 0; + int i, r = 0; - list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) { - r += amdgpu_userq_buffer_va_mapped(queue->vm, va_cursor->gpu_addr); + for (i = 0; i < ARRAY_SIZE(queue->userq_vas.va_array); i++) { + if (!queue->userq_vas.va_array[i]) + continue; + r += amdgpu_userq_buffer_va_mapped(queue->vm, + queue->userq_vas.va_array[i]); dev_dbg(queue->userq_mgr->adev->dev, "validate the userq mapping:%p va:%llx r:%d\n", - queue, va_cursor->gpu_addr, r); + queue, queue->userq_vas.va_array[i], r); } if (r != 0) @@ -309,35 +282,7 @@ static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue) return false; } -static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping, - struct amdgpu_userq_va_cursor *va_cursor) -{ - atomic_set(&mapping->bo_va->userq_va_mapped, 0); - list_del(&va_cursor->list); - kfree(va_cursor); -} - -static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, - struct amdgpu_usermode_queue *queue) -{ - struct amdgpu_userq_va_cursor *va_cursor, *tmp; - struct amdgpu_bo_va_mapping *mapping; - - /* Caller must hold vm->root.bo reservation */ - dma_resv_assert_held(queue->vm->root.bo->tbo.base.resv); - - list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) { - mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr); - if (!mapping) { - return -EINVAL; - } - dev_dbg(adev->dev, "delete the userq:%p va:%llx\n", - queue, va_cursor->gpu_addr); - amdgpu_userq_buffer_va_list_del(mapping, va_cursor); - } - return 0; -} static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue) { @@ -345,23 +290,18 @@ static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; - bool found_hung_queue = false; - int r = 0; + int r; if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { r = userq_funcs->preempt(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; - found_hung_queue = true; + return r; } else { queue->state = AMDGPU_USERQ_STATE_PREEMPTED; } } - - if (found_hung_queue) - amdgpu_userq_detect_and_reset_queues(uq_mgr); - - return r; + return 0; } static int amdgpu_userq_restore_helper(struct amdgpu_usermode_queue *queue) @@ -390,24 +330,21 @@ static int amdgpu_userq_unmap_helper(struct amdgpu_usermode_queue *queue) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; - bool found_hung_queue = false; - int r = 0; + int r; if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) || - (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { + (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { + r = userq_funcs->unmap(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; - found_hung_queue = true; + return r; } else { queue->state = AMDGPU_USERQ_STATE_UNMAPPED; } } - if (found_hung_queue) - amdgpu_userq_detect_and_reset_queues(uq_mgr); - - return r; + return 0; } static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue) @@ -416,19 +353,19 @@ static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; - int r = 0; + int r; if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) { r = userq_funcs->map(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; - amdgpu_userq_detect_and_reset_queues(uq_mgr); + return r; } else { queue->state = AMDGPU_USERQ_STATE_MAPPED; } } - return r; + return 0; } static void amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue) @@ -445,18 +382,14 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue) { struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; struct amdgpu_device *adev = uq_mgr->adev; - const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; /* Wait for mode-1 reset to complete */ down_read(&adev->reset_domain->sem); - uq_funcs->mqd_destroy(queue); /* Use interrupt-safe locking since IRQ handlers may access these XArrays */ xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index); amdgpu_userq_fence_driver_free(queue); queue->fence_drv = NULL; - queue->userq_mgr = NULL; - list_del(&queue->userq_va_list); up_read(&adev->reset_domain->sem); } @@ -495,74 +428,15 @@ retry: dma_fence_put(ev_fence); } -int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_userq_obj *userq_obj, - int size) -{ - struct amdgpu_device *adev = uq_mgr->adev; - struct amdgpu_bo_param bp; - int r; - memset(&bp, 0, sizeof(bp)); - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - bp.type = ttm_bo_type_kernel; - bp.size = size; - bp.resv = NULL; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - - r = amdgpu_bo_create(adev, &bp, &userq_obj->obj); - if (r) { - drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r); - return r; - } - r = amdgpu_bo_reserve(userq_obj->obj, true); - if (r) { - drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r); - goto free_obj; - } - - r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo); - if (r) { - drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r); - goto unresv; - } - - r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr); - if (r) { - drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r); - goto unresv; - } - - userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj); - amdgpu_bo_unreserve(userq_obj->obj); - memset(userq_obj->cpu_ptr, 0, size); - return 0; - -unresv: - amdgpu_bo_unreserve(userq_obj->obj); - -free_obj: - amdgpu_bo_unref(&userq_obj->obj); - return r; -} - -void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_userq_obj *userq_obj) -{ - amdgpu_bo_kunmap(userq_obj->obj); - amdgpu_bo_unref(&userq_obj->obj); -} - -uint64_t +static int amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_db_info *db_info, - struct drm_file *filp) + struct drm_file *filp, + u64 *index) { - uint64_t index; + u64 doorbell_index; struct drm_gem_object *gobj; struct amdgpu_userq_obj *db_obj = db_info->db_obj; int r, db_size; @@ -609,12 +483,13 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, goto unpin_bo; } - index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, - db_info->doorbell_offset, db_size); + doorbell_index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, + db_info->doorbell_offset, db_size); drm_dbg_driver(adev_to_drm(uq_mgr->adev), - "[Usermode queues] doorbell index=%lld\n", index); + "[Usermode queues] doorbell index=%lld\n", doorbell_index); amdgpu_bo_unreserve(db_obj->obj); - return index; + *index = doorbell_index; + return 0; unpin_bo: amdgpu_bo_unpin(db_obj->obj); @@ -629,9 +504,7 @@ static int amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev = uq_mgr->adev; - struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); - struct amdgpu_vm *vm = &fpriv->vm; - + const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; int r = 0; cancel_delayed_work_sync(&uq_mgr->resume_work); @@ -639,27 +512,21 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que /* Cancel any pending hang detection work and cleanup */ cancel_delayed_work_sync(&queue->hang_detect_work); - r = amdgpu_bo_reserve(vm->root.bo, false); - if (r) { - drm_file_err(uq_mgr->file, "Failed to reserve root bo during userqueue destroy\n"); - return r; - } - amdgpu_userq_buffer_vas_list_cleanup(adev, queue); - amdgpu_bo_unreserve(vm->root.bo); - mutex_lock(&uq_mgr->userq_mutex); - queue->hang_detect_fence = NULL; amdgpu_userq_wait_for_last_fence(queue); #if defined(CONFIG_DEBUG_FS) debugfs_remove_recursive(queue->debugfs_queue); #endif - amdgpu_userq_detect_and_reset_queues(uq_mgr); r = amdgpu_userq_unmap_helper(queue); atomic_dec(&uq_mgr->userq_count[queue->queue_type]); amdgpu_userq_cleanup(queue); mutex_unlock(&uq_mgr->userq_mutex); + cancel_delayed_work_sync(&queue->hang_detect_work); + uq_funcs->mqd_destroy(queue); + queue->userq_mgr = NULL; + amdgpu_bo_reserve(queue->db_obj.obj, true); amdgpu_bo_unpin(queue->db_obj.obj); amdgpu_bo_unreserve(queue->db_obj.obj); @@ -731,14 +598,14 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) const struct amdgpu_userq_funcs *uq_funcs; struct amdgpu_usermode_queue *queue; struct amdgpu_db_info db_info; - bool skip_map_queue; - u32 qid; uint64_t index; - int r = 0; - int priority = - (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> - AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; + int priority; + u32 qid; + int r; + priority = + (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) + >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; r = amdgpu_userq_priority_permit(filp, priority); if (r) return r; @@ -751,128 +618,121 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) uq_funcs = adev->userq_funcs[args->in.ip_type]; if (!uq_funcs) { - drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n", - args->in.ip_type); r = -EINVAL; goto err_pm_runtime; } queue = kzalloc_obj(struct amdgpu_usermode_queue); if (!queue) { - drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n"); r = -ENOMEM; goto err_pm_runtime; } - INIT_LIST_HEAD(&queue->userq_va_list); + kref_init(&queue->refcount); queue->doorbell_handle = args->in.doorbell_handle; queue->queue_type = args->in.ip_type; queue->vm = &fpriv->vm; queue->priority = priority; - - db_info.queue_type = queue->queue_type; - db_info.doorbell_handle = queue->doorbell_handle; - db_info.db_obj = &queue->db_obj; - db_info.doorbell_offset = args->in.doorbell_offset; - queue->userq_mgr = uq_mgr; + INIT_DELAYED_WORK(&queue->hang_detect_work, + amdgpu_userq_hang_detect_work); - /* Validate the userq virtual address.*/ - r = amdgpu_bo_reserve(fpriv->vm.root.bo, false); + r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv); if (r) goto free_queue; - if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, args->in.queue_size) || - amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || - amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { + xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); + mutex_init(&queue->fence_drv_lock); + /* Make sure the queue can actually run with those virtual addresses. */ + r = amdgpu_bo_reserve(fpriv->vm.root.bo, false); + if (r) + goto free_fence_drv; + + if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, + args->in.queue_size, + &queue->userq_vas.va.queue_rb) || + amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, + AMDGPU_GPU_PAGE_SIZE, + &queue->userq_vas.va.rptr) || + amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, + AMDGPU_GPU_PAGE_SIZE, + &queue->userq_vas.va.wptr)) { r = -EINVAL; amdgpu_bo_unreserve(fpriv->vm.root.bo); - goto clean_mapping; + goto free_fence_drv; } amdgpu_bo_unreserve(fpriv->vm.root.bo); /* Convert relative doorbell offset into absolute doorbell index */ - index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp); - if (index == (uint64_t)-EINVAL) { + db_info.queue_type = queue->queue_type; + db_info.doorbell_handle = queue->doorbell_handle; + db_info.db_obj = &queue->db_obj; + db_info.doorbell_offset = args->in.doorbell_offset; + r = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp, &index); + if (r) { drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); - r = -EINVAL; - goto clean_mapping; + goto free_fence_drv; } queue->doorbell_index = index; - xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); - r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv); - if (r) { - drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n"); - goto clean_mapping; - } - r = uq_funcs->mqd_create(queue, &args->in); if (r) { drm_file_err(uq_mgr->file, "Failed to create Queue\n"); - goto clean_fence_driver; + goto clean_doorbell_bo; } + /* Update VM owner at userq submit-time for page-fault attribution. */ + amdgpu_vm_set_task_info(&fpriv->vm); + + r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, + GFP_KERNEL)); + if (r) + goto clean_mqd; + amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); /* don't map the queue if scheduling is halted */ - if (adev->userq_halt_for_enforce_isolation && - ((queue->queue_type == AMDGPU_HW_IP_GFX) || - (queue->queue_type == AMDGPU_HW_IP_COMPUTE))) - skip_map_queue = true; - else - skip_map_queue = false; - if (!skip_map_queue) { + if (!adev->userq_halt_for_enforce_isolation || + ((queue->queue_type != AMDGPU_HW_IP_GFX) && + (queue->queue_type != AMDGPU_HW_IP_COMPUTE))) { r = amdgpu_userq_map_helper(queue); if (r) { drm_file_err(uq_mgr->file, "Failed to map Queue\n"); - goto clean_mqd; + mutex_unlock(&uq_mgr->userq_mutex); + goto erase_doorbell; } } - /* drop this refcount during queue destroy */ - kref_init(&queue->refcount); - - /* Wait for mode-1 reset to complete */ - down_read(&adev->reset_domain->sem); + atomic_inc(&uq_mgr->userq_count[queue->queue_type]); + mutex_unlock(&uq_mgr->userq_mutex); r = xa_alloc(&uq_mgr->userq_xa, &qid, queue, - XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL); - if (r) { - if (!skip_map_queue) - amdgpu_userq_unmap_helper(queue); - r = -ENOMEM; - goto clean_reset_domain; - } - - r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL)); + XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), + GFP_KERNEL); if (r) { - xa_erase(&uq_mgr->userq_xa, qid); - if (!skip_map_queue) - amdgpu_userq_unmap_helper(queue); - goto clean_reset_domain; + /* + * This drops the last reference which should take care of + * all cleanup. + */ + amdgpu_userq_put(queue); + return r; } - up_read(&adev->reset_domain->sem); amdgpu_debugfs_userq_init(filp, queue, qid); - amdgpu_userq_init_hang_detect_work(queue); - args->out.queue_id = qid; - atomic_inc(&uq_mgr->userq_count[queue->queue_type]); - mutex_unlock(&uq_mgr->userq_mutex); return 0; -clean_reset_domain: - up_read(&adev->reset_domain->sem); +erase_doorbell: + xa_erase_irq(&adev->userq_doorbell_xa, index); clean_mqd: - mutex_unlock(&uq_mgr->userq_mutex); uq_funcs->mqd_destroy(queue); -clean_fence_driver: +clean_doorbell_bo: + amdgpu_bo_reserve(queue->db_obj.obj, true); + amdgpu_bo_unpin(queue->db_obj.obj); + amdgpu_bo_unreserve(queue->db_obj.obj); + amdgpu_bo_unref(&queue->db_obj.obj); +free_fence_drv: amdgpu_userq_fence_driver_free(queue); -clean_mapping: - amdgpu_bo_reserve(fpriv->vm.root.bo, true); - amdgpu_userq_buffer_vas_list_cleanup(adev, queue); - amdgpu_bo_unreserve(fpriv->vm.root.bo); free_queue: kfree(queue); err_pm_runtime: @@ -1262,7 +1122,6 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) unsigned long queue_id; int ret = 0, r; - amdgpu_userq_detect_and_reset_queues(uq_mgr); /* Try to unmap all the queues in this process ctx */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { r = amdgpu_userq_preempt_helper(queue); @@ -1270,29 +1129,16 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) ret = r; } - if (ret) + if (ret) { drm_file_err(uq_mgr->file, "Couldn't unmap all the queues, eviction failed ret=%d\n", ret); + amdgpu_reset_domain_schedule(uq_mgr->adev->reset_domain, + &uq_mgr->reset_work); + flush_work(&uq_mgr->reset_work); + } return ret; } -void amdgpu_userq_reset_work(struct work_struct *work) -{ - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, - userq_reset_work); - struct amdgpu_reset_context reset_context; - - memset(&reset_context, 0, sizeof(reset_context)); - - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - reset_context.src = AMDGPU_RESET_SRC_USERQ; - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/ - - amdgpu_device_gpu_recover(adev, NULL, &reset_context); -} - static void amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) { @@ -1326,9 +1172,24 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f userq_mgr->file = file_priv; INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); + INIT_WORK(&userq_mgr->reset_work, amdgpu_userq_mgr_reset_work); return 0; } +void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev) +{ + struct xarray *xa = &adev->userq_doorbell_xa; + struct amdgpu_usermode_queue *queue; + unsigned long flags, queue_id; + + xa_lock_irqsave(xa, flags); + xa_for_each(xa, queue_id, queue) { + cancel_delayed_work(&queue->hang_detect_work); + cancel_work(&queue->userq_mgr->reset_work); + } + xa_unlock_irqrestore(xa, flags); +} + void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr) { cancel_delayed_work_sync(&userq_mgr->resume_work); @@ -1354,6 +1215,14 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) } xa_destroy(&userq_mgr->userq_xa); + + /* + * Drain any in-flight reset_work. By this point all queues are freed + * and userq_count is 0, so if reset_work starts now it exits early. + * We still need to wait in case it was already executing gpu_recover. + */ + cancel_work_sync(&userq_mgr->reset_work); + mutex_destroy(&userq_mgr->userq_mutex); } @@ -1372,7 +1241,6 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev) uqm = queue->userq_mgr; cancel_delayed_work_sync(&uqm->resume_work); guard(mutex)(&uqm->userq_mutex); - amdgpu_userq_detect_and_reset_queues(uqm); if (adev->in_s0ix) r = amdgpu_userq_preempt_helper(queue); else @@ -1431,7 +1299,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, if (((queue->queue_type == AMDGPU_HW_IP_GFX) || (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && (queue->xcp_id == idx)) { - amdgpu_userq_detect_and_reset_queues(uqm); r = amdgpu_userq_preempt_helper(queue); if (r) ret = r; @@ -1504,23 +1371,21 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev) { const struct amdgpu_userq_funcs *userq_funcs; struct amdgpu_usermode_queue *queue; - struct amdgpu_userq_mgr *uqm; unsigned long queue_id; + /* TODO: We probably need a new lock for the queue state */ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) { - uqm = queue->userq_mgr; - cancel_delayed_work_sync(&uqm->resume_work); - if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { - amdgpu_userq_wait_for_last_fence(queue); - userq_funcs = adev->userq_funcs[queue->queue_type]; - userq_funcs->unmap(queue); - /* just mark all queues as hung at this point. - * if unmap succeeds, we could map again - * in amdgpu_userq_post_reset() if vram is not lost - */ - queue->state = AMDGPU_USERQ_STATE_HUNG; - amdgpu_userq_fence_driver_force_completion(queue); - } + if (queue->state != AMDGPU_USERQ_STATE_MAPPED) + continue; + + userq_funcs = adev->userq_funcs[queue->queue_type]; + userq_funcs->unmap(queue); + /* just mark all queues as hung at this point. + * if unmap succeeds, we could map again + * in amdgpu_userq_post_reset() if vram is not lost + */ + queue->state = AMDGPU_USERQ_STATE_HUNG; + amdgpu_userq_fence_driver_force_completion(queue); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 8b8f345b60b6..28cfc6682333 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -48,11 +48,6 @@ struct amdgpu_userq_obj { struct amdgpu_bo *obj; }; -struct amdgpu_userq_va_cursor { - u64 gpu_addr; - struct list_head list; -}; - struct amdgpu_usermode_queue { int queue_type; enum amdgpu_userq_state state; @@ -66,17 +61,44 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_obj db_obj; struct amdgpu_userq_obj fw_obj; struct amdgpu_userq_obj wptr_obj; + + /** + * @fence_drv_lock: Protecting @fence_drv_xa. + */ + struct mutex fence_drv_lock; + + /** + * @fence_drv_xa: + * + * References to the external fence drivers returned by wait_ioctl. + * Dropped on the next signaled dma_fence or queue destruction. + */ struct xarray fence_drv_xa; struct amdgpu_userq_fence_driver *fence_drv; struct dma_fence *last_fence; u32 xcp_id; int priority; struct dentry *debugfs_queue; - struct delayed_work hang_detect_work; - struct dma_fence *hang_detect_fence; + + /** + * @hang_detect_work: + * + * Delayed work which runs when userq_fences time out. + */ + struct delayed_work hang_detect_work; struct kref refcount; - struct list_head userq_va_list; + union { + struct { + u64 queue_rb; + u64 wptr; + u64 rptr; + u64 eop; + u64 shadow; + u64 csa; + } va; + u64 va_array[6]; + } userq_vas; }; struct amdgpu_userq_funcs { @@ -105,6 +127,13 @@ struct amdgpu_userq_mgr { struct amdgpu_device *adev; struct delayed_work resume_work; struct drm_file *file; + + /** + * @reset_work: + * + * Reset work which is used when eviction fails. + */ + struct work_struct reset_work; atomic_t userq_count[AMDGPU_RING_TYPE_MAX]; }; @@ -123,25 +152,15 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, struct amdgpu_device *adev); +void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev); void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr); void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr); -int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_userq_obj *userq_obj, - int size); - -void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_userq_obj *userq_obj); - void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr); void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr); -uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_db_info *db_info, - struct drm_file *filp); - u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev); bool amdgpu_userq_enabled(struct drm_device *dev); @@ -160,7 +179,8 @@ void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell); int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue, - u64 addr, u64 expected_size); + u64 addr, u64 expected_size, u64 *va_out); + void amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, uint64_t saddr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index e2d5f04296e1..a41fb72dba94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -121,6 +121,7 @@ amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) userq->last_fence = NULL; amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); xa_destroy(&userq->fence_drv_xa); + mutex_destroy(&userq->fence_drv_lock); /* Drop the queue's ownership reference to fence_drv explicitly */ amdgpu_userq_fence_driver_put(userq->fence_drv); } @@ -134,7 +135,14 @@ amdgpu_userq_fence_put_fence_drv_array(struct amdgpu_userq_fence *userq_fence) userq_fence->fence_drv_array_count = 0; } -void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) +/* + * Returns: + * -ENOENT when no fences were processes + * 1 when more fences are pending + * 0 when no fences are pending any more + */ +int +amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) { struct amdgpu_userq_fence *userq_fence, *tmp; LIST_HEAD(to_be_signaled); @@ -142,9 +150,6 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d unsigned long flags; u64 rptr; - if (!fence_drv) - return; - spin_lock_irqsave(&fence_drv->fence_list_lock, flags); rptr = amdgpu_userq_fence_read(fence_drv); @@ -157,6 +162,9 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d &userq_fence->link); spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); + if (list_empty(&to_be_signaled)) + return -ENOENT; + list_for_each_entry_safe(userq_fence, tmp, &to_be_signaled, link) { fence = &userq_fence->base; list_del_init(&userq_fence->link); @@ -168,6 +176,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d dma_fence_put(fence); } + /* That doesn't need to be accurate so no locking */ + return list_empty(&fence_drv->fences) ? 0 : 1; } void amdgpu_userq_fence_driver_destroy(struct kref *ref) @@ -209,80 +219,84 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); } -static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) +static int amdgpu_userq_fence_alloc(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence **pfence) { - *userq_fence = kmalloc(sizeof(**userq_fence), GFP_KERNEL); - return *userq_fence ? 0 : -ENOMEM; + struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv; + struct amdgpu_userq_fence *userq_fence; + void *entry; + + userq_fence = kmalloc(sizeof(*userq_fence), GFP_KERNEL); + if (!userq_fence) + return -ENOMEM; + + /* + * Get the next unused entry, since we fill from the start this can be + * used as size to allocate the array. + */ + mutex_lock(&userq->fence_drv_lock); + XA_STATE(xas, &userq->fence_drv_xa, 0); + + rcu_read_lock(); + do { + entry = xas_find_marked(&xas, ULONG_MAX, XA_FREE_MARK); + } while (xas_retry(&xas, entry)); + rcu_read_unlock(); + + userq_fence->fence_drv_array = kvmalloc_array(xas.xa_index, + sizeof(fence_drv), + GFP_KERNEL); + if (!userq_fence->fence_drv_array) { + mutex_unlock(&userq->fence_drv_lock); + kfree(userq_fence); + return -ENOMEM; + } + + userq_fence->fence_drv_array_count = xas.xa_index; + xa_extract(&userq->fence_drv_xa, (void **)userq_fence->fence_drv_array, + 0, ULONG_MAX, xas.xa_index, XA_PRESENT); + xa_destroy(&userq->fence_drv_xa); + + mutex_unlock(&userq->fence_drv_lock); + + amdgpu_userq_fence_driver_get(fence_drv); + userq_fence->fence_drv = fence_drv; + + *pfence = userq_fence; + return 0; } -static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, - struct amdgpu_userq_fence *userq_fence, - u64 seq, struct dma_fence **f) +static void amdgpu_userq_fence_init(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence *fence, + u64 seq) { - struct amdgpu_userq_fence_driver *fence_drv; - struct dma_fence *fence; + struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv; unsigned long flags; bool signaled = false; - fence_drv = userq->fence_drv; - if (!fence_drv) - return -EINVAL; - - spin_lock_init(&userq_fence->lock); - INIT_LIST_HEAD(&userq_fence->link); - fence = &userq_fence->base; - userq_fence->fence_drv = fence_drv; - - dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock, + spin_lock_init(&fence->lock); + dma_fence_init64(&fence->base, &amdgpu_userq_fence_ops, &fence->lock, fence_drv->context, seq); - amdgpu_userq_fence_driver_get(fence_drv); - dma_fence_get(fence); - - if (!xa_empty(&userq->fence_drv_xa)) { - struct amdgpu_userq_fence_driver *stored_fence_drv; - unsigned long index, count = 0; - int i = 0; - - xa_lock(&userq->fence_drv_xa); - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) - count++; - - userq_fence->fence_drv_array = - kvmalloc_objs(struct amdgpu_userq_fence_driver *, count, - GFP_ATOMIC); - - if (userq_fence->fence_drv_array) { - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) { - userq_fence->fence_drv_array[i] = stored_fence_drv; - __xa_erase(&userq->fence_drv_xa, index); - i++; - } - } - - userq_fence->fence_drv_array_count = i; - xa_unlock(&userq->fence_drv_xa); - } else { - userq_fence->fence_drv_array = NULL; - userq_fence->fence_drv_array_count = 0; - } + /* Make sure the fence is visible to the hang detect worker */ + dma_fence_put(userq->last_fence); + userq->last_fence = dma_fence_get(&fence->base); - /* Check if hardware has already processed the job */ + /* Check if hardware has already processed the fence */ spin_lock_irqsave(&fence_drv->fence_list_lock, flags); - if (!dma_fence_is_signaled(fence)) { - list_add_tail(&userq_fence->link, &fence_drv->fences); + if (!dma_fence_is_signaled(&fence->base)) { + dma_fence_get(&fence->base); + list_add_tail(&fence->link, &fence_drv->fences); } else { + INIT_LIST_HEAD(&fence->link); signaled = true; - dma_fence_put(fence); } spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); if (signaled) - amdgpu_userq_fence_put_fence_drv_array(userq_fence); - - *f = fence; - - return 0; + amdgpu_userq_fence_put_fence_drv_array(fence); + else + amdgpu_userq_start_hang_detect_work(userq); } static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) @@ -356,56 +370,48 @@ static int amdgpu_userq_fence_read_wptr(struct amdgpu_device *adev, { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo; + struct drm_exec exec; u64 addr, *ptr; - int r; - - r = amdgpu_bo_reserve(queue->vm->root.bo, false); - if (r) - return r; + int ret; addr = queue->userq_prop->wptr_gpu_addr; addr &= AMDGPU_GMC_HOLE_MASK; - mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); - if (!mapping) { - amdgpu_bo_unreserve(queue->vm->root.bo); - DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n"); - return -EINVAL; - } + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(queue->vm, &exec, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto lock_error; - bo = amdgpu_bo_ref(mapping->bo_va->base.bo); - amdgpu_bo_unreserve(queue->vm->root.bo); - r = amdgpu_bo_reserve(bo, true); - if (r) { - amdgpu_bo_unref(&bo); - DRM_ERROR("Failed to reserve userqueue wptr bo"); - return r; + mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); + if (!mapping) { + ret = -EINVAL; + goto lock_error; + } + + ret = drm_exec_lock_obj(&exec, &mapping->bo_va->base.bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto lock_error; } - r = amdgpu_bo_kmap(bo, (void **)&ptr); - if (r) { + bo = mapping->bo_va->base.bo; + ret = amdgpu_bo_kmap(bo, (void **)&ptr); + if (ret) { DRM_ERROR("Failed mapping the userqueue wptr bo"); - goto map_error; + goto lock_error; } *wptr = le64_to_cpu(*ptr); amdgpu_bo_kunmap(bo); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - + drm_exec_fini(&exec); return 0; -map_error: - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - - return r; -} - -static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) -{ - dma_fence_put(fence); +lock_error: + drm_exec_fini(&exec); + return ret; } static void @@ -451,13 +457,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, const unsigned int num_read_bo_handles = args->num_bo_read_handles; struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; + struct drm_gem_object **gobj_write, **gobj_read; u32 *syncobj_handles, num_syncobj_handles; - struct amdgpu_userq_fence *userq_fence; - struct amdgpu_usermode_queue *queue = NULL; - struct drm_syncobj **syncobj = NULL; - struct dma_fence *fence; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_fence *fence; + struct drm_syncobj **syncobj; struct drm_exec exec; + void __user *ptr; int r, i, entry; u64 wptr; @@ -469,13 +476,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, return -EINVAL; num_syncobj_handles = args->num_syncobj_handles; - syncobj_handles = memdup_array_user(u64_to_user_ptr(args->syncobj_handles), - num_syncobj_handles, sizeof(u32)); + ptr = u64_to_user_ptr(args->syncobj_handles); + syncobj_handles = memdup_array_user(ptr, num_syncobj_handles, + sizeof(u32)); if (IS_ERR(syncobj_handles)) return PTR_ERR(syncobj_handles); - /* Array of pointers to the looked up syncobjs */ - syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL); + syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), + GFP_KERNEL); if (!syncobj) { r = -ENOMEM; goto free_syncobj_handles; @@ -489,21 +497,17 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } - r = drm_gem_objects_lookup(filp, - u64_to_user_ptr(args->bo_read_handles), - num_read_bo_handles, - &gobj_read); + ptr = u64_to_user_ptr(args->bo_read_handles); + r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read); if (r) goto free_syncobj; - r = drm_gem_objects_lookup(filp, - u64_to_user_ptr(args->bo_write_handles), - num_write_bo_handles, + ptr = u64_to_user_ptr(args->bo_write_handles); + r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles, &gobj_write); if (r) goto put_gobj_read; - /* Retrieve the user queue */ queue = amdgpu_userq_get(userq_mgr, args->queue_id); if (!queue) { r = -ENOENT; @@ -512,73 +516,61 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, r = amdgpu_userq_fence_read_wptr(adev, queue, &wptr); if (r) - goto put_gobj_write; + goto put_queue; - r = amdgpu_userq_fence_alloc(&userq_fence); + r = amdgpu_userq_fence_alloc(queue, &fence); if (r) - goto put_gobj_write; + goto put_queue; /* We are here means UQ is active, make sure the eviction fence is valid */ amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); - /* Create a new fence */ - r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); - if (r) { - mutex_unlock(&userq_mgr->userq_mutex); - kfree(userq_fence); - goto put_gobj_write; - } + /* Create the new fence */ + amdgpu_userq_fence_init(queue, fence, wptr); - dma_fence_put(queue->last_fence); - queue->last_fence = dma_fence_get(fence); - amdgpu_userq_start_hang_detect_work(queue); mutex_unlock(&userq_mgr->userq_mutex); + /* + * This needs to come after the fence is created since + * amdgpu_userq_ensure_ev_fence() can't be called while holding the resv + * locks. + */ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, (num_read_bo_handles + num_write_bo_handles)); - /* Lock all BOs with retry handling */ drm_exec_until_all_locked(&exec) { - r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); + r = drm_exec_prepare_array(&exec, gobj_read, + num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - amdgpu_userq_fence_cleanup(fence); + if (r) goto exec_fini; - } - r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); + r = drm_exec_prepare_array(&exec, gobj_write, + num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - amdgpu_userq_fence_cleanup(fence); + if (r) goto exec_fini; - } } - for (i = 0; i < num_read_bo_handles; i++) { - if (!gobj_read || !gobj_read[i]->resv) - continue; - - dma_resv_add_fence(gobj_read[i]->resv, fence, + /* And publish the new fence in the BOs and syncobj */ + for (i = 0; i < num_read_bo_handles; i++) + dma_resv_add_fence(gobj_read[i]->resv, &fence->base, DMA_RESV_USAGE_READ); - } - - for (i = 0; i < num_write_bo_handles; i++) { - if (!gobj_write || !gobj_write[i]->resv) - continue; - dma_resv_add_fence(gobj_write[i]->resv, fence, + for (i = 0; i < num_write_bo_handles; i++) + dma_resv_add_fence(gobj_write[i]->resv, &fence->base, DMA_RESV_USAGE_WRITE); - } - /* Add the created fence to syncobj/BO's */ for (i = 0; i < num_syncobj_handles; i++) - drm_syncobj_replace_fence(syncobj[i], fence); + drm_syncobj_replace_fence(syncobj[i], &fence->base); +exec_fini: /* drop the reference acquired in fence creation function */ - dma_fence_put(fence); + dma_fence_put(&fence->base); -exec_fini: drm_exec_fini(&exec); +put_queue: + amdgpu_userq_put(queue); put_gobj_write: for (i = 0; i < num_write_bo_handles; i++) drm_gem_object_put(gobj_write[i]); @@ -589,15 +581,11 @@ put_gobj_read: kvfree(gobj_read); free_syncobj: while (entry-- > 0) - if (syncobj[entry]) - drm_syncobj_put(syncobj[entry]); + drm_syncobj_put(syncobj[entry]); kfree(syncobj); free_syncobj_handles: kfree(syncobj_handles); - if (queue) - amdgpu_userq_put(queue); - return r; } @@ -872,8 +860,10 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp, * Otherwise, we would gather those references until we don't * have any more space left and crash. */ + mutex_lock(&waitq->fence_drv_lock); r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, xa_limit_32b, GFP_KERNEL); + mutex_unlock(&waitq->fence_drv_lock); if (r) goto put_waitq; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index d355a0eecc07..0bd51616cef1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -63,7 +63,7 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_userq_fence_driver **fence_drv_req); void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq); -void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); +int amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_destroy(struct kref *ref); int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9ba9de16a27a..c9f88ecce1a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1631,6 +1631,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, { struct amdgpu_bo_va *bo_va; struct dma_resv *resv; + struct amdgpu_bo *bo; bool clear, unlock; int r; @@ -1650,11 +1651,13 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, base.vm_status); - resv = bo_va->base.bo->tbo.base.resv; + bo = bo_va->base.bo; + resv = bo->tbo.base.resv; spin_unlock(&vm->status_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!adev->debug_vm && dma_resv_trylock(resv)) { + if (!adev->debug_vm && !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && + dma_resv_trylock(resv)) { clear = false; unlock = true; /* The caller is already holding the reservation lock */ @@ -2002,7 +2005,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, * during user requests GEM unmap IOCTL except for forcing the unmap * from user space. */ - if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) + if (unlikely(bo_va->userq_va_mapped)) amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr); list_del(&mapping->list); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index fd881388d612..f27f917e3cdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -562,6 +562,11 @@ static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr, amdgpu_ring_write(ring, 0); } + /* WA: Force sync after TRAP to avoid VPE1 fail to power off */ + if (ring->adev->vpe.collaborate_mode) { + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COLLAB_SYNC, 0)); + amdgpu_ring_write(ring, 0xabcd); + } } static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -968,7 +973,7 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .emit_frame_size = 5 + /* vpe_ring_init_cond_exec */ 6 + /* vpe_ring_emit_pipeline_sync */ - 10 + 10 + 10 + /* vpe_ring_emit_fence */ + 12 + 12 + 12 + /* vpe_ring_emit_fence */ /* vpe_ring_emit_vm_flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 0e0b1e5b88fc..c35372e21261 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -602,6 +602,13 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) "amdgpu/%s_pfp.bin", ucode_prefix); if (err) goto out; + + adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( + (union amdgpu_firmware_header *) + adev->gfx.pfp_fw->data, 2, 0); + if (adev->gfx.rs64_enable) + dev_dbg(adev->dev, "CP RS64 enable\n"); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 5b4121ddc78c..98aa00eeb2f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -81,7 +81,7 @@ mes_userq_create_wptr_mapping(struct amdgpu_device *adev, ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo); if (ret) { DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); - goto fail_map; + goto fail_alloc_gart; } queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj); @@ -89,6 +89,8 @@ mes_userq_create_wptr_mapping(struct amdgpu_device *adev, drm_exec_fini(&exec); return 0; +fail_alloc_gart: + amdgpu_bo_unpin(wptr_obj->obj); fail_map: amdgpu_bo_unref(&wptr_obj->obj); fail_lock: @@ -190,12 +192,16 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, * for the same. */ size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ; - r = amdgpu_userq_create_object(uq_mgr, ctx, size); + r = amdgpu_bo_create_kernel(uq_mgr->adev, size, 0, + AMDGPU_GEM_DOMAIN_GTT, + &ctx->obj, &ctx->gpu_addr, + &ctx->cpu_ptr); if (r) { DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r); return r; } + memset(ctx->cpu_ptr, 0, size); return 0; } @@ -268,13 +274,19 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, return -ENOMEM; } - r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, - AMDGPU_MQD_SIZE_ALIGN(mqd_hw_default->mqd_size)); + r = amdgpu_bo_create_kernel(adev, + AMDGPU_MQD_SIZE_ALIGN(mqd_hw_default->mqd_size), + 0, AMDGPU_GEM_DOMAIN_GTT, + &queue->mqd.obj, &queue->mqd.gpu_addr, + &queue->mqd.cpu_ptr); if (r) { DRM_ERROR("Failed to create MQD object for userqueue\n"); goto free_props; } + memset(queue->mqd.cpu_ptr, 0, + AMDGPU_MQD_SIZE_ALIGN(mqd_hw_default->mqd_size)); + /* Initialize the MQD BO with user given values */ userq_props->wptr_gpu_addr = mqd_user->wptr_va; userq_props->rptr_gpu_addr = mqd_user->rptr_va; @@ -306,8 +318,9 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, kfree(compute_mqd); goto free_mqd; } - r = amdgpu_userq_input_va_validate(adev, queue, compute_mqd->eop_va, - 2048); + r = amdgpu_userq_input_va_validate(adev, queue, + compute_mqd->eop_va, 2048, + &queue->userq_vas.va.eop); amdgpu_bo_unreserve(queue->vm->root.bo); if (r) { kfree(compute_mqd); @@ -356,7 +369,8 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, goto free_mqd; } r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->shadow_va, - shadow_info.shadow_size); + shadow_info.shadow_size, + &queue->userq_vas.va.shadow); if (r) { amdgpu_bo_unreserve(queue->vm->root.bo); kfree(mqd_gfx_v11); @@ -364,7 +378,8 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, } r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->csa_va, - shadow_info.csa_size); + shadow_info.csa_size, + &queue->userq_vas.va.csa); amdgpu_bo_unreserve(queue->vm->root.bo); if (r) { kfree(mqd_gfx_v11); @@ -394,7 +409,8 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, goto free_mqd; } r = amdgpu_userq_input_va_validate(adev, queue, mqd_sdma_v11->csa_va, - 32); + 32, + &queue->userq_vas.va.csa); amdgpu_bo_unreserve(queue->vm->root.bo); if (r) { kfree(mqd_sdma_v11); @@ -430,10 +446,12 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue, return 0; free_ctx: - amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj); + amdgpu_bo_free_kernel(&queue->fw_obj.obj, &queue->fw_obj.gpu_addr, + &queue->fw_obj.cpu_ptr); free_mqd: - amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); + amdgpu_bo_free_kernel(&queue->mqd.obj, &queue->mqd.gpu_addr, + &queue->mqd.cpu_ptr); free_props: kfree(userq_props); @@ -443,11 +461,12 @@ free_props: static void mes_userq_mqd_destroy(struct amdgpu_usermode_queue *queue) { - struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr; - amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj); + amdgpu_bo_free_kernel(&queue->fw_obj.obj, &queue->fw_obj.gpu_addr, + &queue->fw_obj.cpu_ptr); kfree(queue->userq_prop); - amdgpu_userq_destroy_object(uq_mgr, &queue->mqd); + amdgpu_bo_free_kernel(&queue->mqd.obj, &queue->mqd.gpu_addr, + &queue->mqd.cpu_ptr); } static int mes_userq_preempt(struct amdgpu_usermode_queue *queue) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c index 5b7b46d242c6..93253db5e2de 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c @@ -42,9 +42,10 @@ #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" +#define VCE_V1_0_ALIGNMENT (32 * 1024) #define VCE_V1_0_FW_SIZE (256 * 1024) #define VCE_V1_0_STACK_SIZE (64 * 1024) -#define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1)) +#define VCE_V1_0_DATA_SIZE (ALIGN(7808 * (AMDGPU_MAX_VCE_HANDLES + 1), VCE_V1_0_ALIGNMENT)) #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev); @@ -177,7 +178,7 @@ static void vce_v1_0_init_cg(struct amdgpu_device *adev) } /** - * vce_v1_0_load_fw_signature - load firmware signature into VCPU BO + * vce_v1_0_load_fw() - load firmware signature into VCPU BO * * @adev: amdgpu_device pointer * @@ -185,21 +186,26 @@ static void vce_v1_0_init_cg(struct amdgpu_device *adev) * This function finds the signature appropriate for the current * ASIC and writes that into the VCPU BO. */ -static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev) +static int vce_v1_0_load_fw(struct amdgpu_device *adev) { const struct common_firmware_header *hdr; struct vce_v1_0_fw_signature *sign; - unsigned int ucode_offset; + u32 ucode_offset; + u32 ucode_size; uint32_t chip_id; u32 *cpu_addr; int i; hdr = (const struct common_firmware_header *)adev->vce.fw->data; ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + ucode_size = hdr->ucode_size_bytes - sizeof(struct vce_v1_0_fw_signature *); cpu_addr = adev->vce.cpu_addr; sign = (void *)adev->vce.fw->data + ucode_offset; + if (ucode_size > VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET) + return -EINVAL; + switch (adev->asic_type) { case CHIP_TAHITI: chip_id = 0x01000014; @@ -226,12 +232,14 @@ static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev) return -EINVAL; } + memset_io(&cpu_addr[0], 0, amdgpu_bo_size(adev->vce.vcpu_bo)); + cpu_addr += (256 - 64) / 4; memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16); cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64); memset_io(&cpu_addr[5], 0, 44); - memcpy_toio(&cpu_addr[16], &sign[1], hdr->ucode_size_bytes - sizeof(*sign)); + memcpy_toio(&cpu_addr[16], &sign[1], ucode_size); cpu_addr += (le32_to_cpu(sign->length) + 64) / 4; memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16); @@ -312,18 +320,23 @@ static int vce_v1_0_mc_resume(struct amdgpu_device *adev) WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES); offset = adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET; - size = VCE_V1_0_FW_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); + size = VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET; + WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); offset += size; size = VCE_V1_0_STACK_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff); + WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT)); + WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT)); + WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset); WREG32(mmVCE_VCPU_CACHE_SIZE1, size); offset += size; size = VCE_V1_0_DATA_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff); + WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT)); + WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT)); + WARN_ON((offset + size - adev->vce.gpu_addr) > amdgpu_bo_size(adev->vce.vcpu_bo)); + WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset); WREG32(mmVCE_VCPU_CACHE_SIZE2, size); WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100); @@ -527,22 +540,31 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block) * To accomodate that, we put GART to the LOW address range * and reserve some GART pages where we map the VCPU BO, * so that it gets a 32-bit address. + * + * The BAR address is zero and we can't change it + * due to the firmware validation mechanism. + * It seems that it fails to initialize if the address is >= 128 MiB. */ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) { u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo); - u64 max_vcpu_bo_addr = 0xffffffff - bo_size; + u64 max_vcpu_bo_addr = 0x07ffffff - bo_size; u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE; u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo); u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID; u64 vce_gart_start_offs; int r; - r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, - &adev->vce.gart_node, num_pages, - DRM_MM_INSERT_LOW); - if (r) - return r; + if (adev->gmc.vram_start < adev->gmc.gart_start) + return amdgpu_bo_gpu_offset(adev->vce.vcpu_bo) <= max_vcpu_bo_addr ? 0 : -EINVAL; + + if (!drm_mm_node_allocated(&adev->vce.gart_node)) { + r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, + &adev->vce.gart_node, num_pages, + DRM_MM_INSERT_LOW); + if (r) + return r; + } vce_gart_start_offs = amdgpu_gtt_node_to_byte_offset(&adev->vce.gart_node); @@ -553,8 +575,6 @@ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) amdgpu_gart_map_vram_range(adev, pa, adev->vce.gart_node.start, num_pages, flags, adev->gart.ptr); adev->vce.gpu_addr = adev->gmc.gart_start + vce_gart_start_offs; - if (adev->vce.gpu_addr > max_vcpu_bo_addr) - return -EINVAL; return 0; } @@ -574,10 +594,7 @@ static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - r = amdgpu_vce_resume(adev); - if (r) - return r; - r = vce_v1_0_load_fw_signature(adev); + r = vce_v1_0_load_fw(adev); if (r) return r; r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev); @@ -696,10 +713,7 @@ static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; - r = amdgpu_vce_resume(adev); - if (r) - return r; - r = vce_v1_0_load_fw_signature(adev); + r = vce_v1_0_load_fw(adev); if (r) return r; r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index db149eda6204..3a6fc8604108 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -37,9 +37,14 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" + +/* Use 24K to be safe. The FW supposedly only requires 23744 bytes. */ +#define VCE_V2_0_DATA_ENTRY_SIZE (24 * 1024) + #define VCE_V2_0_FW_SIZE (256 * 1024) #define VCE_V2_0_STACK_SIZE (64 * 1024) -#define VCE_V2_0_DATA_SIZE (23552 * AMDGPU_MAX_VCE_HANDLES) +#define VCE_V2_0_DATA_SIZE (VCE_V2_0_DATA_ENTRY_SIZE * (AMDGPU_MAX_VCE_HANDLES + 1)) + #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev); @@ -183,7 +188,7 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev) WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); offset = AMDGPU_VCE_FIRMWARE_OFFSET; - size = VCE_V2_0_FW_SIZE; + size = VCE_V2_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET; WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 03d79e464f04..c69f7d82060f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -574,7 +574,7 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx) } else WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); offset = AMDGPU_VCE_FIRMWARE_OFFSET; - size = VCE_V3_0_FW_SIZE; + size = VCE_V3_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET; WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); |
