summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c527
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c298
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v1_0.c64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c2
24 files changed, 581 insertions, 584 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8bc591deb546..fd50da4c7b18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1190,7 +1190,6 @@ struct amdgpu_device {
bool apu_prefer_gtt;
bool userq_halt_for_enforce_isolation;
- struct work_struct userq_reset_work;
struct amdgpu_uid *uid_info;
struct amdgpu_uma_carveout_info uma_info;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index d9e283f3b57d..9783a3cefb04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -36,6 +36,9 @@
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"
#include "amdgpu_reset.h"
+#if IS_ENABLED(CONFIG_HSA_AMD)
+#include "kfd_priv.h"
+#endif
/* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables
@@ -320,6 +323,28 @@ void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
(void)amdgpu_reset_domain_schedule(adev->reset_domain, &adev->kfd.reset_work);
}
+void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev)
+{
+#if IS_ENABLED(CONFIG_HSA_AMD)
+ struct kfd_dev *kfd = adev->kfd.dev;
+ unsigned int i;
+
+ if (!kfd)
+ return;
+
+ for (i = 0; i < kfd->num_nodes; i++) {
+ struct kfd_node *node = kfd->nodes[i];
+
+ kfd_dev_unmap_mapping_range(KFD_MMAP_TYPE_DOORBELL |
+ KFD_MMAP_GPU_ID(node->id),
+ kfd_doorbell_process_slice(kfd));
+ kfd_dev_unmap_mapping_range(KFD_MMAP_TYPE_MMIO |
+ KFD_MMAP_GPU_ID(node->id),
+ PAGE_SIZE);
+ }
+#endif
+}
+
int amdgpu_amdkfd_alloc_kernel_mem(struct amdgpu_device *adev, size_t size,
u32 domain, void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool cp_mqd_gfx9)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index cdbab7f8cee8..2b4108f83f48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -358,6 +358,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag, int8_t xcp_id);
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag, int8_t xcp_id);
+void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev);
u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 66ca043658ff..feab90e3efd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3787,7 +3787,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
- INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
amdgpu_coredump_init(adev);
@@ -5478,7 +5477,7 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev))
cancel_work(&adev->reset_work);
#endif
- cancel_work(&adev->userq_reset_work);
+ amdgpu_userq_mgr_cancel_reset_work(adev);
if (adev->kfd.dev)
cancel_work(&adev->kfd.reset_work);
@@ -5836,6 +5835,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
/* We need to lock reset domain only once both for XGMI and single device */
amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+ /* unmap all the mappings of doorbell and framebuffer to prevent user space from
+ * accessing them
+ */
+ unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
+ amdgpu_amdkfd_clear_kfd_mapping(adev);
+
amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
hive, need_emergency_restart);
if (need_emergency_restart)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 8d99bfaa498f..80efeca0ab73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -304,7 +304,7 @@ static int amdgpu_discovery_get_tmr_info(struct amdgpu_device *adev,
adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset;
adev->discovery.size =
adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb << 10;
- if (!adev->discovery.offset || !adev->discovery.size)
+ if (!adev->discovery.size)
return -EINVAL;
} else {
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 5376035d32fe..fe6d988e7f24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -31,6 +31,7 @@
#include <linux/pci.h>
#include <linux/dma-buf.h>
#include <linux/dma-fence-unwrap.h>
+#include <linux/uaccess.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_drv.h>
@@ -508,6 +509,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
if (offset_in_page(args->addr | args->size))
return -EINVAL;
+ if (!access_ok((void __user *)(uintptr_t)args->addr, args->size))
+ return -EFAULT;
+
/* reject unknown flag values */
if (args->flags & ~(AMDGPU_GEM_USERPTR_READONLY |
AMDGPU_GEM_USERPTR_ANONONLY | AMDGPU_GEM_USERPTR_VALIDATE |
@@ -821,7 +825,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_syncobj *timeline_syncobj = NULL;
struct dma_fence_chain *timeline_chain = NULL;
struct drm_exec exec;
- uint64_t vm_size;
+ uint64_t vm_size, tmp;
int r = 0;
/* Validate virtual address range against reserved regions. */
@@ -845,7 +849,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
vm_size -= AMDGPU_VA_RESERVED_TOP;
- if (args->va_address + args->map_size > vm_size) {
+ if (check_add_overflow(args->va_address, args->map_size, &tmp) || tmp > vm_size) {
dev_dbg(dev->dev,
"va_address 0x%llx is in top reserved area 0x%llx\n",
args->va_address + args->map_size, vm_size);
@@ -1089,9 +1093,16 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
* If that number is larger than the size of the array, the ioctl must
* be retried.
*/
+ if (args->num_entries > INT_MAX / sizeof(*vm_entries)) {
+ r = -EINVAL;
+ goto out_exec;
+ }
+
vm_entries = kvcalloc(args->num_entries, sizeof(*vm_entries), GFP_KERNEL);
- if (!vm_entries)
- return -ENOMEM;
+ if (!vm_entries) {
+ r = -ENOMEM;
+ goto out_exec;
+ }
amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) {
if (num_mappings < args->num_entries) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 620fddde4c4d..a5d26b943f6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -199,11 +199,18 @@ int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
enum drm_mm_insert_mode mode)
{
struct amdgpu_device *adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
+ u32 alignment = 0;
int r;
+ /* Align to TLB L2 cache entry size to work around "V bit HW bug" */
+ if (adev->asic_type == CHIP_TAHITI) {
+ alignment = 32 * 1024 / AMDGPU_GPU_PAGE_SIZE;
+ num_pages = ALIGN(num_pages, alignment);
+ }
+
spin_lock(&mgr->lock);
r = drm_mm_insert_node_in_range(&mgr->mm, mm_node, num_pages,
- 0, GART_ENTRY_WITHOUT_BO_COLOR, 0,
+ alignment, GART_ENTRY_WITHOUT_BO_COLOR, 0,
adev->gmc.gart_size >> PAGE_SHIFT,
mode);
spin_unlock(&mgr->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index f72990ac046e..5bfa5a84b09c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -51,8 +51,6 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_hmm.h"
-#define MAX_WALK_BYTE (2UL << 30)
-
/**
* amdgpu_hmm_invalidate_gfx - callback to notify about mm change
*
@@ -78,6 +76,7 @@ static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni,
mmu_interval_set_seq(mni, cur_seq);
+ amdgpu_vm_bo_invalidate(bo, false);
r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
false, MAX_SCHEDULE_TIMEOUT);
mutex_unlock(&adev->notifier_lock);
@@ -170,11 +169,13 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
void *owner,
struct amdgpu_hmm_range *range)
{
- unsigned long end;
+ const u64 max_bytes = SZ_2G;
+
+ struct hmm_range *hmm_range = &range->hmm_range;
unsigned long timeout;
unsigned long *pfns;
- int r = 0;
- struct hmm_range *hmm_range = &range->hmm_range;
+ unsigned long end;
+ int r;
pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
if (unlikely(!pfns)) {
@@ -191,8 +192,9 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
end = start + npages * PAGE_SIZE;
hmm_range->dev_private_owner = owner;
+ hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
do {
- hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end);
+ hmm_range->end = min(hmm_range->start + max_bytes, end);
pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
hmm_range->start, hmm_range->end);
@@ -200,7 +202,6 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
retry:
- hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
r = hmm_range_fault(hmm_range);
if (unlikely(r)) {
if (r == -EBUSY && !time_after(jiffies, timeout))
@@ -210,7 +211,7 @@ retry:
if (hmm_range->end == end)
break;
- hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
+ hmm_range->hmm_pfns += max_bytes >> PAGE_SHIFT;
hmm_range->start = hmm_range->end;
} while (hmm_range->end < end);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 912c9afaf9e1..4d68732d6223 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -96,7 +96,8 @@ struct amdgpu_bo_va {
* if non-zero, cannot unmap from GPU because user queues may still access it
*/
unsigned int queue_refcount;
- atomic_t userq_va_mapped;
+ /* Indicates if this buffer is mapped for any user queue. Once set, never reset. */
+ bool userq_va_mapped;
};
struct amdgpu_bo {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 6c644cfe6695..fc9f3adf9912 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2280,7 +2280,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
list_for_each_entry(obj, &con->head, node) {
if (amdgpu_ras_is_supported(adev, obj->head.block) &&
(obj->attr_inuse == 1)) {
- sprintf(fs_info.debugfs_name, "%s_err_inject",
+ snprintf(fs_info.debugfs_name, sizeof(fs_info.debugfs_name),
+ "%s_err_inject",
get_ras_block_str(&obj->head));
fs_info.head = obj->head;
amdgpu_ras_debugfs_create(adev, &fs_info, dir);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 66e8a2f7afcf..d6bee5c30073 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -552,8 +552,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
- uint32_t value, result, early[3];
+ u32 value, result, early[3] = { 0 };
uint64_t p;
+ u32 avail_dw, start_dw, read_dw;
loff_t i;
int r;
@@ -565,10 +566,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
result = 0;
- if (*pos < 12) {
- if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
- mutex_lock(&ring->adev->cper.ring_lock);
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ mutex_lock(&ring->adev->cper.ring_lock);
+ if (*pos < 12) {
early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
early[2] = ring->wptr & ring->buf_mask;
@@ -600,13 +601,24 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
*pos += 4;
}
} else {
+ early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
+ early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
+
p = early[0];
if (early[0] <= early[1])
- size = (early[1] - early[0]);
+ avail_dw = early[1] - early[0];
else
- size = ring->ring_size - (early[0] - early[1]);
+ avail_dw = ring->buf_mask + 1 - (early[0] - early[1]);
- while (size) {
+ start_dw = (*pos > 12) ? ((*pos - 12) >> 2) : 0;
+ if (start_dw >= avail_dw)
+ goto out;
+
+ p = (p + start_dw) & ring->ptr_mask;
+ avail_dw -= start_dw;
+ read_dw = min_t(u32, avail_dw, size >> 2);
+
+ while (read_dw) {
if (p == early[1])
goto out;
@@ -619,9 +631,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
buf += 4;
result += 4;
- size--;
+ read_dw--;
p++;
p &= ring->ptr_mask;
+ *pos += 4;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
index a0b479d5fff1..f4be19223588 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -175,11 +175,14 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va,
{
unsigned long bit_pos;
- bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem);
- if (bit_pos >= adev->seq64.num_sem)
- return -ENOSPC;
+ for (;;) {
+ bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem);
+ if (bit_pos >= adev->seq64.num_sem)
+ return -ENOSPC;
- __set_bit(bit_pos, adev->seq64.used);
+ if (!test_and_set_bit(bit_pos, adev->seq64.used))
+ break;
+ }
*va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev);
@@ -205,7 +208,7 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va)
bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64);
if (bit_pos < adev->seq64.num_sem)
- __clear_bit(bit_pos, adev->seq64.used);
+ clear_bit(bit_pos, adev->seq64.used);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 0238c2798de4..b8ed931f8a40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -130,6 +130,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
adev->umc.ras->ras_block.hw_ops->query_ras_error_address &&
adev->umc.max_ras_err_cnt_per_query) {
+ kfree(err_data->err_addr);
err_data->err_addr =
kzalloc_objs(struct eeprom_table_record,
adev->umc.max_ras_err_cnt_per_query);
@@ -160,6 +161,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
if (adev->umc.ras &&
adev->umc.ras->ecc_info_query_ras_error_address &&
adev->umc.max_ras_err_cnt_per_query) {
+ kfree(err_data->err_addr);
err_data->err_addr =
kzalloc_objs(struct eeprom_table_record,
adev->umc.max_ras_err_cnt_per_query);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index de140a8ed135..cf192500800f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -82,19 +82,11 @@ static bool amdgpu_userq_is_reset_type_supported(struct amdgpu_device *adev,
return false;
}
-static void amdgpu_userq_gpu_reset(struct amdgpu_device *adev)
-{
- if (amdgpu_device_should_recover_gpu(adev)) {
- amdgpu_reset_domain_schedule(adev->reset_domain,
- &adev->userq_reset_work);
- /* Wait for the reset job to complete */
- flush_work(&adev->userq_reset_work);
- }
-}
-
-static int
-amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
+static void amdgpu_userq_mgr_reset_work(struct work_struct *work)
{
+ struct amdgpu_userq_mgr *uq_mgr =
+ container_of(work, struct amdgpu_userq_mgr,
+ reset_work);
struct amdgpu_device *adev = uq_mgr->adev;
const int queue_types[] = {
AMDGPU_RING_TYPE_COMPUTE,
@@ -103,15 +95,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
};
const int num_queue_types = ARRAY_SIZE(queue_types);
bool gpu_reset = false;
- int r = 0;
- int i;
-
- /* Warning if current process mutex is not held */
- WARN_ON(!mutex_is_locked(&uq_mgr->userq_mutex));
+ int i, r;
if (unlikely(adev->debug_disable_gpu_ring_reset)) {
dev_err(adev->dev, "userq reset disabled by debug mask\n");
- return 0;
+ return;
}
/*
@@ -119,7 +107,7 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
* skip all reset detection logic
*/
if (!amdgpu_gpu_recovery)
- return 0;
+ return;
/*
* Iterate through all queue types to detect and reset problematic queues
@@ -127,9 +115,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
*/
for (i = 0; i < num_queue_types; i++) {
int ring_type = queue_types[i];
- const struct amdgpu_userq_funcs *funcs = adev->userq_funcs[ring_type];
+ const struct amdgpu_userq_funcs *funcs =
+ adev->userq_funcs[ring_type];
- if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, AMDGPU_RESET_TYPE_PER_QUEUE))
+ if (!amdgpu_userq_is_reset_type_supported(adev, ring_type,
+ AMDGPU_RESET_TYPE_PER_QUEUE))
continue;
if (atomic_read(&uq_mgr->userq_count[ring_type]) > 0 &&
@@ -142,46 +132,43 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
}
}
- if (gpu_reset)
- amdgpu_userq_gpu_reset(adev);
+ if (gpu_reset) {
+ struct amdgpu_reset_context reset_context;
- return r;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_USERQ;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
}
static void amdgpu_userq_hang_detect_work(struct work_struct *work)
{
- struct amdgpu_usermode_queue *queue = container_of(work,
- struct amdgpu_usermode_queue,
- hang_detect_work.work);
- struct dma_fence *fence;
- struct amdgpu_userq_mgr *uq_mgr;
-
- if (!queue->userq_mgr)
- return;
-
- uq_mgr = queue->userq_mgr;
- fence = READ_ONCE(queue->hang_detect_fence);
- /* Fence already signaled – no action needed */
- if (!fence || dma_fence_is_signaled(fence))
- return;
+ struct amdgpu_usermode_queue *queue =
+ container_of(work, struct amdgpu_usermode_queue,
+ hang_detect_work.work);
- mutex_lock(&uq_mgr->userq_mutex);
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
- mutex_unlock(&uq_mgr->userq_mutex);
+ /*
+ * Don't schedule the work here! Scheduling or queue work from one reset
+ * handler to another is illegal if you don't take extra precautions!
+ */
+ amdgpu_userq_mgr_reset_work(&queue->userq_mgr->reset_work);
}
/*
* Start hang detection for a user queue fence. A delayed work will be scheduled
- * to check if the fence is still pending after the timeout period.
-*/
+ * to reset the queues when the fence doesn't signal in time.
+ */
void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue)
{
struct amdgpu_device *adev;
unsigned long timeout_ms;
- if (!queue || !queue->userq_mgr || !queue->userq_mgr->adev)
- return;
-
adev = queue->userq_mgr->adev;
/* Determine timeout based on queue type */
switch (queue->queue_type) {
@@ -199,10 +186,8 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue)
break;
}
- /* Store the fence to monitor and schedule hang detection */
- WRITE_ONCE(queue->hang_detect_fence, queue->last_fence);
- schedule_delayed_work(&queue->hang_detect_work,
- msecs_to_jiffies(timeout_ms));
+ queue_delayed_work(adev->reset_domain->wq, &queue->hang_detect_work,
+ msecs_to_jiffies(timeout_ms));
}
void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell)
@@ -210,47 +195,35 @@ void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell)
struct xarray *xa = &adev->userq_doorbell_xa;
struct amdgpu_usermode_queue *queue;
unsigned long flags;
+ int r;
xa_lock_irqsave(xa, flags);
queue = xa_load(xa, doorbell);
- if (queue)
- amdgpu_userq_fence_driver_process(queue->fence_drv);
- xa_unlock_irqrestore(xa, flags);
-}
-
-static void amdgpu_userq_init_hang_detect_work(struct amdgpu_usermode_queue *queue)
-{
- INIT_DELAYED_WORK(&queue->hang_detect_work, amdgpu_userq_hang_detect_work);
- queue->hang_detect_fence = NULL;
-}
-
-static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue,
- struct amdgpu_bo_va_mapping *va_map, u64 addr)
-{
- struct amdgpu_userq_va_cursor *va_cursor;
- struct userq_va_list;
-
- va_cursor = kzalloc_obj(*va_cursor);
- if (!va_cursor)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&va_cursor->list);
- va_cursor->gpu_addr = addr;
- atomic_set(&va_map->bo_va->userq_va_mapped, 1);
- list_add(&va_cursor->list, &queue->userq_va_list);
+ if (queue) {
+ r = amdgpu_userq_fence_driver_process(queue->fence_drv);
+ /*
+ * We are in interrupt context here, this *can't* wait for
+ * reset work to finish.
+ */
+ if (r >= 0)
+ cancel_delayed_work(&queue->hang_detect_work);
- return 0;
+ /* Restart the timer when there are still fences pending */
+ if (r == 1)
+ amdgpu_userq_start_hang_detect_work(queue);
+ }
+ xa_unlock_irqrestore(xa, flags);
}
int amdgpu_userq_input_va_validate(struct amdgpu_device *adev,
struct amdgpu_usermode_queue *queue,
- u64 addr, u64 expected_size)
+ u64 addr, u64 expected_size,
+ u64 *va_out)
{
struct amdgpu_bo_va_mapping *va_map;
struct amdgpu_vm *vm = queue->vm;
u64 user_addr;
u64 size;
- int r = 0;
/* Caller must hold vm->root.bo reservation */
dma_resv_assert_held(queue->vm->root.bo->tbo.base.resv);
@@ -259,20 +232,18 @@ int amdgpu_userq_input_va_validate(struct amdgpu_device *adev,
size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
- if (!va_map) {
- r = -EINVAL;
- goto out_err;
- }
+ if (!va_map)
+ return -EINVAL;
+
/* Only validate the userq whether resident in the VM mapping range */
if (user_addr >= va_map->start &&
va_map->last - user_addr + 1 >= size) {
- amdgpu_userq_buffer_va_list_add(queue, va_map, user_addr);
+ va_map->bo_va->userq_va_mapped = true;
+ *va_out = user_addr;
return 0;
}
- r = -EINVAL;
-out_err:
- return r;
+ return -EINVAL;
}
static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr)
@@ -283,7 +254,7 @@ static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr)
dma_resv_assert_held(vm->root.bo->tbo.base.resv);
mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
- if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped))
+ if (!IS_ERR_OR_NULL(mapping) && mapping->bo_va->userq_va_mapped)
r = true;
else
r = false;
@@ -293,14 +264,16 @@ static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr)
static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue)
{
- struct amdgpu_userq_va_cursor *va_cursor, *tmp;
- int r = 0;
+ int i, r = 0;
- list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
- r += amdgpu_userq_buffer_va_mapped(queue->vm, va_cursor->gpu_addr);
+ for (i = 0; i < ARRAY_SIZE(queue->userq_vas.va_array); i++) {
+ if (!queue->userq_vas.va_array[i])
+ continue;
+ r += amdgpu_userq_buffer_va_mapped(queue->vm,
+ queue->userq_vas.va_array[i]);
dev_dbg(queue->userq_mgr->adev->dev,
"validate the userq mapping:%p va:%llx r:%d\n",
- queue, va_cursor->gpu_addr, r);
+ queue, queue->userq_vas.va_array[i], r);
}
if (r != 0)
@@ -309,35 +282,7 @@ static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue)
return false;
}
-static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping,
- struct amdgpu_userq_va_cursor *va_cursor)
-{
- atomic_set(&mapping->bo_va->userq_va_mapped, 0);
- list_del(&va_cursor->list);
- kfree(va_cursor);
-}
-
-static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev,
- struct amdgpu_usermode_queue *queue)
-{
- struct amdgpu_userq_va_cursor *va_cursor, *tmp;
- struct amdgpu_bo_va_mapping *mapping;
-
- /* Caller must hold vm->root.bo reservation */
- dma_resv_assert_held(queue->vm->root.bo->tbo.base.resv);
-
- list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
- mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr);
- if (!mapping) {
- return -EINVAL;
- }
- dev_dbg(adev->dev, "delete the userq:%p va:%llx\n",
- queue, va_cursor->gpu_addr);
- amdgpu_userq_buffer_va_list_del(mapping, va_cursor);
- }
- return 0;
-}
static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue)
{
@@ -345,23 +290,18 @@ static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue)
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs =
adev->userq_funcs[queue->queue_type];
- bool found_hung_queue = false;
- int r = 0;
+ int r;
if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
r = userq_funcs->preempt(queue);
if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
- found_hung_queue = true;
+ return r;
} else {
queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
}
}
-
- if (found_hung_queue)
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
-
- return r;
+ return 0;
}
static int amdgpu_userq_restore_helper(struct amdgpu_usermode_queue *queue)
@@ -390,24 +330,21 @@ static int amdgpu_userq_unmap_helper(struct amdgpu_usermode_queue *queue)
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs =
adev->userq_funcs[queue->queue_type];
- bool found_hung_queue = false;
- int r = 0;
+ int r;
if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
- (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
+ (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
+
r = userq_funcs->unmap(queue);
if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
- found_hung_queue = true;
+ return r;
} else {
queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
}
}
- if (found_hung_queue)
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
-
- return r;
+ return 0;
}
static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue)
@@ -416,19 +353,19 @@ static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue)
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs =
adev->userq_funcs[queue->queue_type];
- int r = 0;
+ int r;
if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
r = userq_funcs->map(queue);
if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
+ return r;
} else {
queue->state = AMDGPU_USERQ_STATE_MAPPED;
}
}
- return r;
+ return 0;
}
static void amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue)
@@ -445,18 +382,14 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue)
{
struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
struct amdgpu_device *adev = uq_mgr->adev;
- const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
/* Wait for mode-1 reset to complete */
down_read(&adev->reset_domain->sem);
- uq_funcs->mqd_destroy(queue);
/* Use interrupt-safe locking since IRQ handlers may access these XArrays */
xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
amdgpu_userq_fence_driver_free(queue);
queue->fence_drv = NULL;
- queue->userq_mgr = NULL;
- list_del(&queue->userq_va_list);
up_read(&adev->reset_domain->sem);
}
@@ -495,74 +428,15 @@ retry:
dma_fence_put(ev_fence);
}
-int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_userq_obj *userq_obj,
- int size)
-{
- struct amdgpu_device *adev = uq_mgr->adev;
- struct amdgpu_bo_param bp;
- int r;
- memset(&bp, 0, sizeof(bp));
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_GTT;
- bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- bp.type = ttm_bo_type_kernel;
- bp.size = size;
- bp.resv = NULL;
- bp.bo_ptr_size = sizeof(struct amdgpu_bo);
-
- r = amdgpu_bo_create(adev, &bp, &userq_obj->obj);
- if (r) {
- drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r);
- return r;
- }
- r = amdgpu_bo_reserve(userq_obj->obj, true);
- if (r) {
- drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r);
- goto free_obj;
- }
-
- r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo);
- if (r) {
- drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r);
- goto unresv;
- }
-
- r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr);
- if (r) {
- drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r);
- goto unresv;
- }
-
- userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj);
- amdgpu_bo_unreserve(userq_obj->obj);
- memset(userq_obj->cpu_ptr, 0, size);
- return 0;
-
-unresv:
- amdgpu_bo_unreserve(userq_obj->obj);
-
-free_obj:
- amdgpu_bo_unref(&userq_obj->obj);
- return r;
-}
-
-void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_userq_obj *userq_obj)
-{
- amdgpu_bo_kunmap(userq_obj->obj);
- amdgpu_bo_unref(&userq_obj->obj);
-}
-
-uint64_t
+static int
amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_db_info *db_info,
- struct drm_file *filp)
+ struct drm_file *filp,
+ u64 *index)
{
- uint64_t index;
+ u64 doorbell_index;
struct drm_gem_object *gobj;
struct amdgpu_userq_obj *db_obj = db_info->db_obj;
int r, db_size;
@@ -609,12 +483,13 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
goto unpin_bo;
}
- index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
- db_info->doorbell_offset, db_size);
+ doorbell_index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
+ db_info->doorbell_offset, db_size);
drm_dbg_driver(adev_to_drm(uq_mgr->adev),
- "[Usermode queues] doorbell index=%lld\n", index);
+ "[Usermode queues] doorbell index=%lld\n", doorbell_index);
amdgpu_bo_unreserve(db_obj->obj);
- return index;
+ *index = doorbell_index;
+ return 0;
unpin_bo:
amdgpu_bo_unpin(db_obj->obj);
@@ -629,9 +504,7 @@ static int
amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue)
{
struct amdgpu_device *adev = uq_mgr->adev;
- struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
- struct amdgpu_vm *vm = &fpriv->vm;
-
+ const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
int r = 0;
cancel_delayed_work_sync(&uq_mgr->resume_work);
@@ -639,27 +512,21 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que
/* Cancel any pending hang detection work and cleanup */
cancel_delayed_work_sync(&queue->hang_detect_work);
- r = amdgpu_bo_reserve(vm->root.bo, false);
- if (r) {
- drm_file_err(uq_mgr->file, "Failed to reserve root bo during userqueue destroy\n");
- return r;
- }
- amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
- amdgpu_bo_unreserve(vm->root.bo);
-
mutex_lock(&uq_mgr->userq_mutex);
- queue->hang_detect_fence = NULL;
amdgpu_userq_wait_for_last_fence(queue);
#if defined(CONFIG_DEBUG_FS)
debugfs_remove_recursive(queue->debugfs_queue);
#endif
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
r = amdgpu_userq_unmap_helper(queue);
atomic_dec(&uq_mgr->userq_count[queue->queue_type]);
amdgpu_userq_cleanup(queue);
mutex_unlock(&uq_mgr->userq_mutex);
+ cancel_delayed_work_sync(&queue->hang_detect_work);
+ uq_funcs->mqd_destroy(queue);
+ queue->userq_mgr = NULL;
+
amdgpu_bo_reserve(queue->db_obj.obj, true);
amdgpu_bo_unpin(queue->db_obj.obj);
amdgpu_bo_unreserve(queue->db_obj.obj);
@@ -731,14 +598,14 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
const struct amdgpu_userq_funcs *uq_funcs;
struct amdgpu_usermode_queue *queue;
struct amdgpu_db_info db_info;
- bool skip_map_queue;
- u32 qid;
uint64_t index;
- int r = 0;
- int priority =
- (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
- AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
+ int priority;
+ u32 qid;
+ int r;
+ priority =
+ (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK)
+ >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
r = amdgpu_userq_priority_permit(filp, priority);
if (r)
return r;
@@ -751,128 +618,121 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
uq_funcs = adev->userq_funcs[args->in.ip_type];
if (!uq_funcs) {
- drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n",
- args->in.ip_type);
r = -EINVAL;
goto err_pm_runtime;
}
queue = kzalloc_obj(struct amdgpu_usermode_queue);
if (!queue) {
- drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n");
r = -ENOMEM;
goto err_pm_runtime;
}
- INIT_LIST_HEAD(&queue->userq_va_list);
+ kref_init(&queue->refcount);
queue->doorbell_handle = args->in.doorbell_handle;
queue->queue_type = args->in.ip_type;
queue->vm = &fpriv->vm;
queue->priority = priority;
-
- db_info.queue_type = queue->queue_type;
- db_info.doorbell_handle = queue->doorbell_handle;
- db_info.db_obj = &queue->db_obj;
- db_info.doorbell_offset = args->in.doorbell_offset;
-
queue->userq_mgr = uq_mgr;
+ INIT_DELAYED_WORK(&queue->hang_detect_work,
+ amdgpu_userq_hang_detect_work);
- /* Validate the userq virtual address.*/
- r = amdgpu_bo_reserve(fpriv->vm.root.bo, false);
+ r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
if (r)
goto free_queue;
- if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, args->in.queue_size) ||
- amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
- amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
+ xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+ mutex_init(&queue->fence_drv_lock);
+ /* Make sure the queue can actually run with those virtual addresses. */
+ r = amdgpu_bo_reserve(fpriv->vm.root.bo, false);
+ if (r)
+ goto free_fence_drv;
+
+ if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va,
+ args->in.queue_size,
+ &queue->userq_vas.va.queue_rb) ||
+ amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va,
+ AMDGPU_GPU_PAGE_SIZE,
+ &queue->userq_vas.va.rptr) ||
+ amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va,
+ AMDGPU_GPU_PAGE_SIZE,
+ &queue->userq_vas.va.wptr)) {
r = -EINVAL;
amdgpu_bo_unreserve(fpriv->vm.root.bo);
- goto clean_mapping;
+ goto free_fence_drv;
}
amdgpu_bo_unreserve(fpriv->vm.root.bo);
/* Convert relative doorbell offset into absolute doorbell index */
- index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
- if (index == (uint64_t)-EINVAL) {
+ db_info.queue_type = queue->queue_type;
+ db_info.doorbell_handle = queue->doorbell_handle;
+ db_info.db_obj = &queue->db_obj;
+ db_info.doorbell_offset = args->in.doorbell_offset;
+ r = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp, &index);
+ if (r) {
drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");
- r = -EINVAL;
- goto clean_mapping;
+ goto free_fence_drv;
}
queue->doorbell_index = index;
- xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
- r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
- if (r) {
- drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
- goto clean_mapping;
- }
-
r = uq_funcs->mqd_create(queue, &args->in);
if (r) {
drm_file_err(uq_mgr->file, "Failed to create Queue\n");
- goto clean_fence_driver;
+ goto clean_doorbell_bo;
}
+ /* Update VM owner at userq submit-time for page-fault attribution. */
+ amdgpu_vm_set_task_info(&fpriv->vm);
+
+ r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue,
+ GFP_KERNEL));
+ if (r)
+ goto clean_mqd;
+
amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
/* don't map the queue if scheduling is halted */
- if (adev->userq_halt_for_enforce_isolation &&
- ((queue->queue_type == AMDGPU_HW_IP_GFX) ||
- (queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
- skip_map_queue = true;
- else
- skip_map_queue = false;
- if (!skip_map_queue) {
+ if (!adev->userq_halt_for_enforce_isolation ||
+ ((queue->queue_type != AMDGPU_HW_IP_GFX) &&
+ (queue->queue_type != AMDGPU_HW_IP_COMPUTE))) {
r = amdgpu_userq_map_helper(queue);
if (r) {
drm_file_err(uq_mgr->file, "Failed to map Queue\n");
- goto clean_mqd;
+ mutex_unlock(&uq_mgr->userq_mutex);
+ goto erase_doorbell;
}
}
- /* drop this refcount during queue destroy */
- kref_init(&queue->refcount);
-
- /* Wait for mode-1 reset to complete */
- down_read(&adev->reset_domain->sem);
+ atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
+ mutex_unlock(&uq_mgr->userq_mutex);
r = xa_alloc(&uq_mgr->userq_xa, &qid, queue,
- XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
- if (r) {
- if (!skip_map_queue)
- amdgpu_userq_unmap_helper(queue);
- r = -ENOMEM;
- goto clean_reset_domain;
- }
-
- r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
+ XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT),
+ GFP_KERNEL);
if (r) {
- xa_erase(&uq_mgr->userq_xa, qid);
- if (!skip_map_queue)
- amdgpu_userq_unmap_helper(queue);
- goto clean_reset_domain;
+ /*
+ * This drops the last reference which should take care of
+ * all cleanup.
+ */
+ amdgpu_userq_put(queue);
+ return r;
}
- up_read(&adev->reset_domain->sem);
amdgpu_debugfs_userq_init(filp, queue, qid);
- amdgpu_userq_init_hang_detect_work(queue);
-
args->out.queue_id = qid;
- atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
- mutex_unlock(&uq_mgr->userq_mutex);
return 0;
-clean_reset_domain:
- up_read(&adev->reset_domain->sem);
+erase_doorbell:
+ xa_erase_irq(&adev->userq_doorbell_xa, index);
clean_mqd:
- mutex_unlock(&uq_mgr->userq_mutex);
uq_funcs->mqd_destroy(queue);
-clean_fence_driver:
+clean_doorbell_bo:
+ amdgpu_bo_reserve(queue->db_obj.obj, true);
+ amdgpu_bo_unpin(queue->db_obj.obj);
+ amdgpu_bo_unreserve(queue->db_obj.obj);
+ amdgpu_bo_unref(&queue->db_obj.obj);
+free_fence_drv:
amdgpu_userq_fence_driver_free(queue);
-clean_mapping:
- amdgpu_bo_reserve(fpriv->vm.root.bo, true);
- amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
- amdgpu_bo_unreserve(fpriv->vm.root.bo);
free_queue:
kfree(queue);
err_pm_runtime:
@@ -1262,7 +1122,6 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
unsigned long queue_id;
int ret = 0, r;
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
/* Try to unmap all the queues in this process ctx */
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
r = amdgpu_userq_preempt_helper(queue);
@@ -1270,29 +1129,16 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
ret = r;
}
- if (ret)
+ if (ret) {
drm_file_err(uq_mgr->file,
"Couldn't unmap all the queues, eviction failed ret=%d\n", ret);
+ amdgpu_reset_domain_schedule(uq_mgr->adev->reset_domain,
+ &uq_mgr->reset_work);
+ flush_work(&uq_mgr->reset_work);
+ }
return ret;
}
-void amdgpu_userq_reset_work(struct work_struct *work)
-{
- struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
- userq_reset_work);
- struct amdgpu_reset_context reset_context;
-
- memset(&reset_context, 0, sizeof(reset_context));
-
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
- reset_context.src = AMDGPU_RESET_SRC_USERQ;
- set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
- /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/
-
- amdgpu_device_gpu_recover(adev, NULL, &reset_context);
-}
-
static void
amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
{
@@ -1326,9 +1172,24 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f
userq_mgr->file = file_priv;
INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
+ INIT_WORK(&userq_mgr->reset_work, amdgpu_userq_mgr_reset_work);
return 0;
}
+void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev)
+{
+ struct xarray *xa = &adev->userq_doorbell_xa;
+ struct amdgpu_usermode_queue *queue;
+ unsigned long flags, queue_id;
+
+ xa_lock_irqsave(xa, flags);
+ xa_for_each(xa, queue_id, queue) {
+ cancel_delayed_work(&queue->hang_detect_work);
+ cancel_work(&queue->userq_mgr->reset_work);
+ }
+ xa_unlock_irqrestore(xa, flags);
+}
+
void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr)
{
cancel_delayed_work_sync(&userq_mgr->resume_work);
@@ -1354,6 +1215,14 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
}
xa_destroy(&userq_mgr->userq_xa);
+
+ /*
+ * Drain any in-flight reset_work. By this point all queues are freed
+ * and userq_count is 0, so if reset_work starts now it exits early.
+ * We still need to wait in case it was already executing gpu_recover.
+ */
+ cancel_work_sync(&userq_mgr->reset_work);
+
mutex_destroy(&userq_mgr->userq_mutex);
}
@@ -1372,7 +1241,6 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev)
uqm = queue->userq_mgr;
cancel_delayed_work_sync(&uqm->resume_work);
guard(mutex)(&uqm->userq_mutex);
- amdgpu_userq_detect_and_reset_queues(uqm);
if (adev->in_s0ix)
r = amdgpu_userq_preempt_helper(queue);
else
@@ -1431,7 +1299,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
(queue->xcp_id == idx)) {
- amdgpu_userq_detect_and_reset_queues(uqm);
r = amdgpu_userq_preempt_helper(queue);
if (r)
ret = r;
@@ -1504,23 +1371,21 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev)
{
const struct amdgpu_userq_funcs *userq_funcs;
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm;
unsigned long queue_id;
+ /* TODO: We probably need a new lock for the queue state */
xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
- uqm = queue->userq_mgr;
- cancel_delayed_work_sync(&uqm->resume_work);
- if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
- amdgpu_userq_wait_for_last_fence(queue);
- userq_funcs = adev->userq_funcs[queue->queue_type];
- userq_funcs->unmap(queue);
- /* just mark all queues as hung at this point.
- * if unmap succeeds, we could map again
- * in amdgpu_userq_post_reset() if vram is not lost
- */
- queue->state = AMDGPU_USERQ_STATE_HUNG;
- amdgpu_userq_fence_driver_force_completion(queue);
- }
+ if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
+ continue;
+
+ userq_funcs = adev->userq_funcs[queue->queue_type];
+ userq_funcs->unmap(queue);
+ /* just mark all queues as hung at this point.
+ * if unmap succeeds, we could map again
+ * in amdgpu_userq_post_reset() if vram is not lost
+ */
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ amdgpu_userq_fence_driver_force_completion(queue);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 8b8f345b60b6..28cfc6682333 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -48,11 +48,6 @@ struct amdgpu_userq_obj {
struct amdgpu_bo *obj;
};
-struct amdgpu_userq_va_cursor {
- u64 gpu_addr;
- struct list_head list;
-};
-
struct amdgpu_usermode_queue {
int queue_type;
enum amdgpu_userq_state state;
@@ -66,17 +61,44 @@ struct amdgpu_usermode_queue {
struct amdgpu_userq_obj db_obj;
struct amdgpu_userq_obj fw_obj;
struct amdgpu_userq_obj wptr_obj;
+
+ /**
+ * @fence_drv_lock: Protecting @fence_drv_xa.
+ */
+ struct mutex fence_drv_lock;
+
+ /**
+ * @fence_drv_xa:
+ *
+ * References to the external fence drivers returned by wait_ioctl.
+ * Dropped on the next signaled dma_fence or queue destruction.
+ */
struct xarray fence_drv_xa;
struct amdgpu_userq_fence_driver *fence_drv;
struct dma_fence *last_fence;
u32 xcp_id;
int priority;
struct dentry *debugfs_queue;
- struct delayed_work hang_detect_work;
- struct dma_fence *hang_detect_fence;
+
+ /**
+ * @hang_detect_work:
+ *
+ * Delayed work which runs when userq_fences time out.
+ */
+ struct delayed_work hang_detect_work;
struct kref refcount;
- struct list_head userq_va_list;
+ union {
+ struct {
+ u64 queue_rb;
+ u64 wptr;
+ u64 rptr;
+ u64 eop;
+ u64 shadow;
+ u64 csa;
+ } va;
+ u64 va_array[6];
+ } userq_vas;
};
struct amdgpu_userq_funcs {
@@ -105,6 +127,13 @@ struct amdgpu_userq_mgr {
struct amdgpu_device *adev;
struct delayed_work resume_work;
struct drm_file *file;
+
+ /**
+ * @reset_work:
+ *
+ * Reset work which is used when eviction fails.
+ */
+ struct work_struct reset_work;
atomic_t userq_count[AMDGPU_RING_TYPE_MAX];
};
@@ -123,25 +152,15 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp
int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
struct amdgpu_device *adev);
+void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev);
void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr);
void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr);
-int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_userq_obj *userq_obj,
- int size);
-
-void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_userq_obj *userq_obj);
-
void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr);
void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
struct amdgpu_eviction_fence_mgr *evf_mgr);
-uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
- struct amdgpu_db_info *db_info,
- struct drm_file *filp);
-
u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev);
bool amdgpu_userq_enabled(struct drm_device *dev);
@@ -160,7 +179,8 @@ void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell);
int amdgpu_userq_input_va_validate(struct amdgpu_device *adev,
struct amdgpu_usermode_queue *queue,
- u64 addr, u64 expected_size);
+ u64 addr, u64 expected_size, u64 *va_out);
+
void amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping,
uint64_t saddr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index e2d5f04296e1..a41fb72dba94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -121,6 +121,7 @@ amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
userq->last_fence = NULL;
amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
xa_destroy(&userq->fence_drv_xa);
+ mutex_destroy(&userq->fence_drv_lock);
/* Drop the queue's ownership reference to fence_drv explicitly */
amdgpu_userq_fence_driver_put(userq->fence_drv);
}
@@ -134,7 +135,14 @@ amdgpu_userq_fence_put_fence_drv_array(struct amdgpu_userq_fence *userq_fence)
userq_fence->fence_drv_array_count = 0;
}
-void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
+/*
+ * Returns:
+ * -ENOENT when no fences were processes
+ * 1 when more fences are pending
+ * 0 when no fences are pending any more
+ */
+int
+amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
{
struct amdgpu_userq_fence *userq_fence, *tmp;
LIST_HEAD(to_be_signaled);
@@ -142,9 +150,6 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
unsigned long flags;
u64 rptr;
- if (!fence_drv)
- return;
-
spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
rptr = amdgpu_userq_fence_read(fence_drv);
@@ -157,6 +162,9 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
&userq_fence->link);
spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+ if (list_empty(&to_be_signaled))
+ return -ENOENT;
+
list_for_each_entry_safe(userq_fence, tmp, &to_be_signaled, link) {
fence = &userq_fence->base;
list_del_init(&userq_fence->link);
@@ -168,6 +176,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
dma_fence_put(fence);
}
+ /* That doesn't need to be accurate so no locking */
+ return list_empty(&fence_drv->fences) ? 0 : 1;
}
void amdgpu_userq_fence_driver_destroy(struct kref *ref)
@@ -209,80 +219,84 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
}
-static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
+static int amdgpu_userq_fence_alloc(struct amdgpu_usermode_queue *userq,
+ struct amdgpu_userq_fence **pfence)
{
- *userq_fence = kmalloc(sizeof(**userq_fence), GFP_KERNEL);
- return *userq_fence ? 0 : -ENOMEM;
+ struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv;
+ struct amdgpu_userq_fence *userq_fence;
+ void *entry;
+
+ userq_fence = kmalloc(sizeof(*userq_fence), GFP_KERNEL);
+ if (!userq_fence)
+ return -ENOMEM;
+
+ /*
+ * Get the next unused entry, since we fill from the start this can be
+ * used as size to allocate the array.
+ */
+ mutex_lock(&userq->fence_drv_lock);
+ XA_STATE(xas, &userq->fence_drv_xa, 0);
+
+ rcu_read_lock();
+ do {
+ entry = xas_find_marked(&xas, ULONG_MAX, XA_FREE_MARK);
+ } while (xas_retry(&xas, entry));
+ rcu_read_unlock();
+
+ userq_fence->fence_drv_array = kvmalloc_array(xas.xa_index,
+ sizeof(fence_drv),
+ GFP_KERNEL);
+ if (!userq_fence->fence_drv_array) {
+ mutex_unlock(&userq->fence_drv_lock);
+ kfree(userq_fence);
+ return -ENOMEM;
+ }
+
+ userq_fence->fence_drv_array_count = xas.xa_index;
+ xa_extract(&userq->fence_drv_xa, (void **)userq_fence->fence_drv_array,
+ 0, ULONG_MAX, xas.xa_index, XA_PRESENT);
+ xa_destroy(&userq->fence_drv_xa);
+
+ mutex_unlock(&userq->fence_drv_lock);
+
+ amdgpu_userq_fence_driver_get(fence_drv);
+ userq_fence->fence_drv = fence_drv;
+
+ *pfence = userq_fence;
+ return 0;
}
-static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
- struct amdgpu_userq_fence *userq_fence,
- u64 seq, struct dma_fence **f)
+static void amdgpu_userq_fence_init(struct amdgpu_usermode_queue *userq,
+ struct amdgpu_userq_fence *fence,
+ u64 seq)
{
- struct amdgpu_userq_fence_driver *fence_drv;
- struct dma_fence *fence;
+ struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv;
unsigned long flags;
bool signaled = false;
- fence_drv = userq->fence_drv;
- if (!fence_drv)
- return -EINVAL;
-
- spin_lock_init(&userq_fence->lock);
- INIT_LIST_HEAD(&userq_fence->link);
- fence = &userq_fence->base;
- userq_fence->fence_drv = fence_drv;
-
- dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
+ spin_lock_init(&fence->lock);
+ dma_fence_init64(&fence->base, &amdgpu_userq_fence_ops, &fence->lock,
fence_drv->context, seq);
- amdgpu_userq_fence_driver_get(fence_drv);
- dma_fence_get(fence);
-
- if (!xa_empty(&userq->fence_drv_xa)) {
- struct amdgpu_userq_fence_driver *stored_fence_drv;
- unsigned long index, count = 0;
- int i = 0;
-
- xa_lock(&userq->fence_drv_xa);
- xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
- count++;
-
- userq_fence->fence_drv_array =
- kvmalloc_objs(struct amdgpu_userq_fence_driver *, count,
- GFP_ATOMIC);
-
- if (userq_fence->fence_drv_array) {
- xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
- userq_fence->fence_drv_array[i] = stored_fence_drv;
- __xa_erase(&userq->fence_drv_xa, index);
- i++;
- }
- }
-
- userq_fence->fence_drv_array_count = i;
- xa_unlock(&userq->fence_drv_xa);
- } else {
- userq_fence->fence_drv_array = NULL;
- userq_fence->fence_drv_array_count = 0;
- }
+ /* Make sure the fence is visible to the hang detect worker */
+ dma_fence_put(userq->last_fence);
+ userq->last_fence = dma_fence_get(&fence->base);
- /* Check if hardware has already processed the job */
+ /* Check if hardware has already processed the fence */
spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
- if (!dma_fence_is_signaled(fence)) {
- list_add_tail(&userq_fence->link, &fence_drv->fences);
+ if (!dma_fence_is_signaled(&fence->base)) {
+ dma_fence_get(&fence->base);
+ list_add_tail(&fence->link, &fence_drv->fences);
} else {
+ INIT_LIST_HEAD(&fence->link);
signaled = true;
- dma_fence_put(fence);
}
spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
if (signaled)
- amdgpu_userq_fence_put_fence_drv_array(userq_fence);
-
- *f = fence;
-
- return 0;
+ amdgpu_userq_fence_put_fence_drv_array(fence);
+ else
+ amdgpu_userq_start_hang_detect_work(userq);
}
static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
@@ -356,56 +370,48 @@ static int amdgpu_userq_fence_read_wptr(struct amdgpu_device *adev,
{
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_bo *bo;
+ struct drm_exec exec;
u64 addr, *ptr;
- int r;
-
- r = amdgpu_bo_reserve(queue->vm->root.bo, false);
- if (r)
- return r;
+ int ret;
addr = queue->userq_prop->wptr_gpu_addr;
addr &= AMDGPU_GMC_HOLE_MASK;
- mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
- if (!mapping) {
- amdgpu_bo_unreserve(queue->vm->root.bo);
- DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
- return -EINVAL;
- }
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2);
+ drm_exec_until_all_locked(&exec) {
+ ret = amdgpu_vm_lock_pd(queue->vm, &exec, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto lock_error;
- bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
- amdgpu_bo_unreserve(queue->vm->root.bo);
- r = amdgpu_bo_reserve(bo, true);
- if (r) {
- amdgpu_bo_unref(&bo);
- DRM_ERROR("Failed to reserve userqueue wptr bo");
- return r;
+ mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
+ if (!mapping) {
+ ret = -EINVAL;
+ goto lock_error;
+ }
+
+ ret = drm_exec_lock_obj(&exec, &mapping->bo_va->base.bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto lock_error;
}
- r = amdgpu_bo_kmap(bo, (void **)&ptr);
- if (r) {
+ bo = mapping->bo_va->base.bo;
+ ret = amdgpu_bo_kmap(bo, (void **)&ptr);
+ if (ret) {
DRM_ERROR("Failed mapping the userqueue wptr bo");
- goto map_error;
+ goto lock_error;
}
*wptr = le64_to_cpu(*ptr);
amdgpu_bo_kunmap(bo);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
-
+ drm_exec_fini(&exec);
return 0;
-map_error:
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
-
- return r;
-}
-
-static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
-{
- dma_fence_put(fence);
+lock_error:
+ drm_exec_fini(&exec);
+ return ret;
}
static void
@@ -451,13 +457,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
const unsigned int num_read_bo_handles = args->num_bo_read_handles;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+
struct drm_gem_object **gobj_write, **gobj_read;
u32 *syncobj_handles, num_syncobj_handles;
- struct amdgpu_userq_fence *userq_fence;
- struct amdgpu_usermode_queue *queue = NULL;
- struct drm_syncobj **syncobj = NULL;
- struct dma_fence *fence;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_fence *fence;
+ struct drm_syncobj **syncobj;
struct drm_exec exec;
+ void __user *ptr;
int r, i, entry;
u64 wptr;
@@ -469,13 +476,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
num_syncobj_handles = args->num_syncobj_handles;
- syncobj_handles = memdup_array_user(u64_to_user_ptr(args->syncobj_handles),
- num_syncobj_handles, sizeof(u32));
+ ptr = u64_to_user_ptr(args->syncobj_handles);
+ syncobj_handles = memdup_array_user(ptr, num_syncobj_handles,
+ sizeof(u32));
if (IS_ERR(syncobj_handles))
return PTR_ERR(syncobj_handles);
- /* Array of pointers to the looked up syncobjs */
- syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
+ syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj),
+ GFP_KERNEL);
if (!syncobj) {
r = -ENOMEM;
goto free_syncobj_handles;
@@ -489,21 +497,17 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
}
}
- r = drm_gem_objects_lookup(filp,
- u64_to_user_ptr(args->bo_read_handles),
- num_read_bo_handles,
- &gobj_read);
+ ptr = u64_to_user_ptr(args->bo_read_handles);
+ r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read);
if (r)
goto free_syncobj;
- r = drm_gem_objects_lookup(filp,
- u64_to_user_ptr(args->bo_write_handles),
- num_write_bo_handles,
+ ptr = u64_to_user_ptr(args->bo_write_handles);
+ r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles,
&gobj_write);
if (r)
goto put_gobj_read;
- /* Retrieve the user queue */
queue = amdgpu_userq_get(userq_mgr, args->queue_id);
if (!queue) {
r = -ENOENT;
@@ -512,73 +516,61 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
r = amdgpu_userq_fence_read_wptr(adev, queue, &wptr);
if (r)
- goto put_gobj_write;
+ goto put_queue;
- r = amdgpu_userq_fence_alloc(&userq_fence);
+ r = amdgpu_userq_fence_alloc(queue, &fence);
if (r)
- goto put_gobj_write;
+ goto put_queue;
/* We are here means UQ is active, make sure the eviction fence is valid */
amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
- /* Create a new fence */
- r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
- if (r) {
- mutex_unlock(&userq_mgr->userq_mutex);
- kfree(userq_fence);
- goto put_gobj_write;
- }
+ /* Create the new fence */
+ amdgpu_userq_fence_init(queue, fence, wptr);
- dma_fence_put(queue->last_fence);
- queue->last_fence = dma_fence_get(fence);
- amdgpu_userq_start_hang_detect_work(queue);
mutex_unlock(&userq_mgr->userq_mutex);
+ /*
+ * This needs to come after the fence is created since
+ * amdgpu_userq_ensure_ev_fence() can't be called while holding the resv
+ * locks.
+ */
drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
(num_read_bo_handles + num_write_bo_handles));
- /* Lock all BOs with retry handling */
drm_exec_until_all_locked(&exec) {
- r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+ r = drm_exec_prepare_array(&exec, gobj_read,
+ num_read_bo_handles, 1);
drm_exec_retry_on_contention(&exec);
- if (r) {
- amdgpu_userq_fence_cleanup(fence);
+ if (r)
goto exec_fini;
- }
- r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+ r = drm_exec_prepare_array(&exec, gobj_write,
+ num_write_bo_handles, 1);
drm_exec_retry_on_contention(&exec);
- if (r) {
- amdgpu_userq_fence_cleanup(fence);
+ if (r)
goto exec_fini;
- }
}
- for (i = 0; i < num_read_bo_handles; i++) {
- if (!gobj_read || !gobj_read[i]->resv)
- continue;
-
- dma_resv_add_fence(gobj_read[i]->resv, fence,
+ /* And publish the new fence in the BOs and syncobj */
+ for (i = 0; i < num_read_bo_handles; i++)
+ dma_resv_add_fence(gobj_read[i]->resv, &fence->base,
DMA_RESV_USAGE_READ);
- }
-
- for (i = 0; i < num_write_bo_handles; i++) {
- if (!gobj_write || !gobj_write[i]->resv)
- continue;
- dma_resv_add_fence(gobj_write[i]->resv, fence,
+ for (i = 0; i < num_write_bo_handles; i++)
+ dma_resv_add_fence(gobj_write[i]->resv, &fence->base,
DMA_RESV_USAGE_WRITE);
- }
- /* Add the created fence to syncobj/BO's */
for (i = 0; i < num_syncobj_handles; i++)
- drm_syncobj_replace_fence(syncobj[i], fence);
+ drm_syncobj_replace_fence(syncobj[i], &fence->base);
+exec_fini:
/* drop the reference acquired in fence creation function */
- dma_fence_put(fence);
+ dma_fence_put(&fence->base);
-exec_fini:
drm_exec_fini(&exec);
+put_queue:
+ amdgpu_userq_put(queue);
put_gobj_write:
for (i = 0; i < num_write_bo_handles; i++)
drm_gem_object_put(gobj_write[i]);
@@ -589,15 +581,11 @@ put_gobj_read:
kvfree(gobj_read);
free_syncobj:
while (entry-- > 0)
- if (syncobj[entry])
- drm_syncobj_put(syncobj[entry]);
+ drm_syncobj_put(syncobj[entry]);
kfree(syncobj);
free_syncobj_handles:
kfree(syncobj_handles);
- if (queue)
- amdgpu_userq_put(queue);
-
return r;
}
@@ -872,8 +860,10 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
* Otherwise, we would gather those references until we don't
* have any more space left and crash.
*/
+ mutex_lock(&waitq->fence_drv_lock);
r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
xa_limit_32b, GFP_KERNEL);
+ mutex_unlock(&waitq->fence_drv_lock);
if (r)
goto put_waitq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
index d355a0eecc07..0bd51616cef1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -63,7 +63,7 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
struct amdgpu_userq_fence_driver **fence_drv_req);
void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq);
-void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
+int amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq);
void amdgpu_userq_fence_driver_destroy(struct kref *ref);
int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9ba9de16a27a..c9f88ecce1a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1631,6 +1631,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
{
struct amdgpu_bo_va *bo_va;
struct dma_resv *resv;
+ struct amdgpu_bo *bo;
bool clear, unlock;
int r;
@@ -1650,11 +1651,13 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
while (!list_empty(&vm->invalidated)) {
bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
base.vm_status);
- resv = bo_va->base.bo->tbo.base.resv;
+ bo = bo_va->base.bo;
+ resv = bo->tbo.base.resv;
spin_unlock(&vm->status_lock);
/* Try to reserve the BO to avoid clearing its ptes */
- if (!adev->debug_vm && dma_resv_trylock(resv)) {
+ if (!adev->debug_vm && !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
+ dma_resv_trylock(resv)) {
clear = false;
unlock = true;
/* The caller is already holding the reservation lock */
@@ -2002,7 +2005,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
* during user requests GEM unmap IOCTL except for forcing the unmap
* from user space.
*/
- if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0))
+ if (unlikely(bo_va->userq_va_mapped))
amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr);
list_del(&mapping->list);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index fd881388d612..f27f917e3cdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -562,6 +562,11 @@ static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr,
amdgpu_ring_write(ring, 0);
}
+ /* WA: Force sync after TRAP to avoid VPE1 fail to power off */
+ if (ring->adev->vpe.collaborate_mode) {
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COLLAB_SYNC, 0));
+ amdgpu_ring_write(ring, 0xabcd);
+ }
}
static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -968,7 +973,7 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = {
.emit_frame_size =
5 + /* vpe_ring_init_cond_exec */
6 + /* vpe_ring_emit_pipeline_sync */
- 10 + 10 + 10 + /* vpe_ring_emit_fence */
+ 12 + 12 + 12 + /* vpe_ring_emit_fence */
/* vpe_ring_emit_vm_flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 0e0b1e5b88fc..c35372e21261 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -602,6 +602,13 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
"amdgpu/%s_pfp.bin", ucode_prefix);
if (err)
goto out;
+
+ adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
+ (union amdgpu_firmware_header *)
+ adev->gfx.pfp_fw->data, 2, 0);
+ if (adev->gfx.rs64_enable)
+ dev_dbg(adev->dev, "CP RS64 enable\n");
+
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 5b4121ddc78c..98aa00eeb2f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -81,7 +81,7 @@ mes_userq_create_wptr_mapping(struct amdgpu_device *adev,
ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo);
if (ret) {
DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
- goto fail_map;
+ goto fail_alloc_gart;
}
queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj);
@@ -89,6 +89,8 @@ mes_userq_create_wptr_mapping(struct amdgpu_device *adev,
drm_exec_fini(&exec);
return 0;
+fail_alloc_gart:
+ amdgpu_bo_unpin(wptr_obj->obj);
fail_map:
amdgpu_bo_unref(&wptr_obj->obj);
fail_lock:
@@ -190,12 +192,16 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
* for the same.
*/
size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ;
- r = amdgpu_userq_create_object(uq_mgr, ctx, size);
+ r = amdgpu_bo_create_kernel(uq_mgr->adev, size, 0,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &ctx->obj, &ctx->gpu_addr,
+ &ctx->cpu_ptr);
if (r) {
DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r);
return r;
}
+ memset(ctx->cpu_ptr, 0, size);
return 0;
}
@@ -268,13 +274,19 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
return -ENOMEM;
}
- r = amdgpu_userq_create_object(uq_mgr, &queue->mqd,
- AMDGPU_MQD_SIZE_ALIGN(mqd_hw_default->mqd_size));
+ r = amdgpu_bo_create_kernel(adev,
+ AMDGPU_MQD_SIZE_ALIGN(mqd_hw_default->mqd_size),
+ 0, AMDGPU_GEM_DOMAIN_GTT,
+ &queue->mqd.obj, &queue->mqd.gpu_addr,
+ &queue->mqd.cpu_ptr);
if (r) {
DRM_ERROR("Failed to create MQD object for userqueue\n");
goto free_props;
}
+ memset(queue->mqd.cpu_ptr, 0,
+ AMDGPU_MQD_SIZE_ALIGN(mqd_hw_default->mqd_size));
+
/* Initialize the MQD BO with user given values */
userq_props->wptr_gpu_addr = mqd_user->wptr_va;
userq_props->rptr_gpu_addr = mqd_user->rptr_va;
@@ -306,8 +318,9 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
kfree(compute_mqd);
goto free_mqd;
}
- r = amdgpu_userq_input_va_validate(adev, queue, compute_mqd->eop_va,
- 2048);
+ r = amdgpu_userq_input_va_validate(adev, queue,
+ compute_mqd->eop_va, 2048,
+ &queue->userq_vas.va.eop);
amdgpu_bo_unreserve(queue->vm->root.bo);
if (r) {
kfree(compute_mqd);
@@ -356,7 +369,8 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
goto free_mqd;
}
r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->shadow_va,
- shadow_info.shadow_size);
+ shadow_info.shadow_size,
+ &queue->userq_vas.va.shadow);
if (r) {
amdgpu_bo_unreserve(queue->vm->root.bo);
kfree(mqd_gfx_v11);
@@ -364,7 +378,8 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
}
r = amdgpu_userq_input_va_validate(adev, queue, mqd_gfx_v11->csa_va,
- shadow_info.csa_size);
+ shadow_info.csa_size,
+ &queue->userq_vas.va.csa);
amdgpu_bo_unreserve(queue->vm->root.bo);
if (r) {
kfree(mqd_gfx_v11);
@@ -394,7 +409,8 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
goto free_mqd;
}
r = amdgpu_userq_input_va_validate(adev, queue, mqd_sdma_v11->csa_va,
- 32);
+ 32,
+ &queue->userq_vas.va.csa);
amdgpu_bo_unreserve(queue->vm->root.bo);
if (r) {
kfree(mqd_sdma_v11);
@@ -430,10 +446,12 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
return 0;
free_ctx:
- amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+ amdgpu_bo_free_kernel(&queue->fw_obj.obj, &queue->fw_obj.gpu_addr,
+ &queue->fw_obj.cpu_ptr);
free_mqd:
- amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+ amdgpu_bo_free_kernel(&queue->mqd.obj, &queue->mqd.gpu_addr,
+ &queue->mqd.cpu_ptr);
free_props:
kfree(userq_props);
@@ -443,11 +461,12 @@ free_props:
static void mes_userq_mqd_destroy(struct amdgpu_usermode_queue *queue)
{
- struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
- amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+ amdgpu_bo_free_kernel(&queue->fw_obj.obj, &queue->fw_obj.gpu_addr,
+ &queue->fw_obj.cpu_ptr);
kfree(queue->userq_prop);
- amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+ amdgpu_bo_free_kernel(&queue->mqd.obj, &queue->mqd.gpu_addr,
+ &queue->mqd.cpu_ptr);
}
static int mes_userq_preempt(struct amdgpu_usermode_queue *queue)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
index 5b7b46d242c6..93253db5e2de 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
@@ -42,9 +42,10 @@
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+#define VCE_V1_0_ALIGNMENT (32 * 1024)
#define VCE_V1_0_FW_SIZE (256 * 1024)
#define VCE_V1_0_STACK_SIZE (64 * 1024)
-#define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1))
+#define VCE_V1_0_DATA_SIZE (ALIGN(7808 * (AMDGPU_MAX_VCE_HANDLES + 1), VCE_V1_0_ALIGNMENT))
#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -177,7 +178,7 @@ static void vce_v1_0_init_cg(struct amdgpu_device *adev)
}
/**
- * vce_v1_0_load_fw_signature - load firmware signature into VCPU BO
+ * vce_v1_0_load_fw() - load firmware signature into VCPU BO
*
* @adev: amdgpu_device pointer
*
@@ -185,21 +186,26 @@ static void vce_v1_0_init_cg(struct amdgpu_device *adev)
* This function finds the signature appropriate for the current
* ASIC and writes that into the VCPU BO.
*/
-static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev)
+static int vce_v1_0_load_fw(struct amdgpu_device *adev)
{
const struct common_firmware_header *hdr;
struct vce_v1_0_fw_signature *sign;
- unsigned int ucode_offset;
+ u32 ucode_offset;
+ u32 ucode_size;
uint32_t chip_id;
u32 *cpu_addr;
int i;
hdr = (const struct common_firmware_header *)adev->vce.fw->data;
ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ ucode_size = hdr->ucode_size_bytes - sizeof(struct vce_v1_0_fw_signature *);
cpu_addr = adev->vce.cpu_addr;
sign = (void *)adev->vce.fw->data + ucode_offset;
+ if (ucode_size > VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET)
+ return -EINVAL;
+
switch (adev->asic_type) {
case CHIP_TAHITI:
chip_id = 0x01000014;
@@ -226,12 +232,14 @@ static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev)
return -EINVAL;
}
+ memset_io(&cpu_addr[0], 0, amdgpu_bo_size(adev->vce.vcpu_bo));
+
cpu_addr += (256 - 64) / 4;
memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16);
cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64);
memset_io(&cpu_addr[5], 0, 44);
- memcpy_toio(&cpu_addr[16], &sign[1], hdr->ucode_size_bytes - sizeof(*sign));
+ memcpy_toio(&cpu_addr[16], &sign[1], ucode_size);
cpu_addr += (le32_to_cpu(sign->length) + 64) / 4;
memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16);
@@ -312,18 +320,23 @@ static int vce_v1_0_mc_resume(struct amdgpu_device *adev)
WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES);
offset = adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET;
- size = VCE_V1_0_FW_SIZE;
- WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
+ size = VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset);
WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
offset += size;
size = VCE_V1_0_STACK_SIZE;
- WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
+ WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT));
+ WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT));
+ WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset);
WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
offset += size;
size = VCE_V1_0_DATA_SIZE;
- WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
+ WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT));
+ WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT));
+ WARN_ON((offset + size - adev->vce.gpu_addr) > amdgpu_bo_size(adev->vce.vcpu_bo));
+ WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset);
WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
@@ -527,22 +540,31 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block)
* To accomodate that, we put GART to the LOW address range
* and reserve some GART pages where we map the VCPU BO,
* so that it gets a 32-bit address.
+ *
+ * The BAR address is zero and we can't change it
+ * due to the firmware validation mechanism.
+ * It seems that it fails to initialize if the address is >= 128 MiB.
*/
static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
{
u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo);
- u64 max_vcpu_bo_addr = 0xffffffff - bo_size;
+ u64 max_vcpu_bo_addr = 0x07ffffff - bo_size;
u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE;
u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID;
u64 vce_gart_start_offs;
int r;
- r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr,
- &adev->vce.gart_node, num_pages,
- DRM_MM_INSERT_LOW);
- if (r)
- return r;
+ if (adev->gmc.vram_start < adev->gmc.gart_start)
+ return amdgpu_bo_gpu_offset(adev->vce.vcpu_bo) <= max_vcpu_bo_addr ? 0 : -EINVAL;
+
+ if (!drm_mm_node_allocated(&adev->vce.gart_node)) {
+ r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr,
+ &adev->vce.gart_node, num_pages,
+ DRM_MM_INSERT_LOW);
+ if (r)
+ return r;
+ }
vce_gart_start_offs = amdgpu_gtt_node_to_byte_offset(&adev->vce.gart_node);
@@ -553,8 +575,6 @@ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
amdgpu_gart_map_vram_range(adev, pa, adev->vce.gart_node.start,
num_pages, flags, adev->gart.ptr);
adev->vce.gpu_addr = adev->gmc.gart_start + vce_gart_start_offs;
- if (adev->vce.gpu_addr > max_vcpu_bo_addr)
- return -EINVAL;
return 0;
}
@@ -574,10 +594,7 @@ static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
- r = amdgpu_vce_resume(adev);
- if (r)
- return r;
- r = vce_v1_0_load_fw_signature(adev);
+ r = vce_v1_0_load_fw(adev);
if (r)
return r;
r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
@@ -696,10 +713,7 @@ static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
int r;
- r = amdgpu_vce_resume(adev);
- if (r)
- return r;
- r = vce_v1_0_load_fw_signature(adev);
+ r = vce_v1_0_load_fw(adev);
if (r)
return r;
r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index db149eda6204..3a6fc8604108 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -37,9 +37,14 @@
#include "oss/oss_2_0_d.h"
#include "oss/oss_2_0_sh_mask.h"
+
+/* Use 24K to be safe. The FW supposedly only requires 23744 bytes. */
+#define VCE_V2_0_DATA_ENTRY_SIZE (24 * 1024)
+
#define VCE_V2_0_FW_SIZE (256 * 1024)
#define VCE_V2_0_STACK_SIZE (64 * 1024)
-#define VCE_V2_0_DATA_SIZE (23552 * AMDGPU_MAX_VCE_HANDLES)
+#define VCE_V2_0_DATA_SIZE (VCE_V2_0_DATA_ENTRY_SIZE * (AMDGPU_MAX_VCE_HANDLES + 1))
+
#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -183,7 +188,7 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev)
WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
offset = AMDGPU_VCE_FIRMWARE_OFFSET;
- size = VCE_V2_0_FW_SIZE;
+ size = VCE_V2_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET;
WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 03d79e464f04..c69f7d82060f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -574,7 +574,7 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
} else
WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
offset = AMDGPU_VCE_FIRMWARE_OFFSET;
- size = VCE_V3_0_FW_SIZE;
+ size = VCE_V3_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET;
WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
WREG32(mmVCE_VCPU_CACHE_SIZE0, size);