diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
21 files changed, 300 insertions, 138 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 892c90b8d063..49e7881750fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -690,6 +690,7 @@ enum amdgpu_uid_type { AMDGPU_UID_TYPE_XCD, AMDGPU_UID_TYPE_AID, AMDGPU_UID_TYPE_SOC, + AMDGPU_UID_TYPE_MID, AMDGPU_UID_TYPE_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 40c22438b1d2..4f27c75abedb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -692,9 +692,9 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, goto err_ib_sched; } - /* Drop the initial kref_init count (see drm_sched_main as example) */ - dma_fence_put(f); ret = dma_fence_wait(f, false); + /* Drop the returned fence reference after the wait completes */ + dma_fence_put(f); err_ib_sched: amdgpu_job_free(job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index aa9239b310a3..092fd3309099 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -618,6 +618,110 @@ out: } /** + * amdgpu_debugfs_regs_pcie64_read - Read from a 64-bit PCIE register + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + */ +static ssize_t amdgpu_debugfs_regs_pcie64_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x7 || *pos & 0x7) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + r = amdgpu_virt_enable_access_debugfs(adev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + uint64_t value; + + value = RREG64_PCIE_EXT(*pos); + + r = put_user(value, (uint64_t *)buf); + if (r) + goto out; + + result += 8; + buf += 8; + *pos += 8; + size -= 8; + } + + r = result; +out: + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + amdgpu_virt_disable_access_debugfs(adev); + return r; +} + +/** + * amdgpu_debugfs_regs_pcie64_write - Write to a 64-bit PCIE register + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + */ +static ssize_t amdgpu_debugfs_regs_pcie64_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x7 || *pos & 0x7) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + r = amdgpu_virt_enable_access_debugfs(adev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + uint64_t value; + + r = get_user(value, (uint64_t *)buf); + if (r) + goto out; + + WREG64_PCIE_EXT(*pos, value); + + result += 8; + buf += 8; + *pos += 8; + size -= 8; + } + + r = result; +out: + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + amdgpu_virt_disable_access_debugfs(adev); + return r; +} + +/** * amdgpu_debugfs_regs_didt_read - Read from a DIDT register * * @f: open file handle @@ -1525,6 +1629,12 @@ static const struct file_operations amdgpu_debugfs_regs_pcie_fops = { .write = amdgpu_debugfs_regs_pcie_write, .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_regs_pcie64_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_pcie64_read, + .write = amdgpu_debugfs_regs_pcie64_write, + .llseek = default_llseek +}; static const struct file_operations amdgpu_debugfs_regs_smc_fops = { .owner = THIS_MODULE, .read = amdgpu_debugfs_regs_smc_read, @@ -1587,6 +1697,7 @@ static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_gprwave_fops, &amdgpu_debugfs_regs_didt_fops, &amdgpu_debugfs_regs_pcie_fops, + &amdgpu_debugfs_regs_pcie64_fops, &amdgpu_debugfs_regs_smc_fops, &amdgpu_debugfs_gca_config_fops, &amdgpu_debugfs_sensors_fops, @@ -1604,6 +1715,7 @@ static const char * const debugfs_regs_names[] = { "amdgpu_gprwave", "amdgpu_regs_didt", "amdgpu_regs_pcie", + "amdgpu_regs_pcie64", "amdgpu_regs_smc", "amdgpu_gca_config", "amdgpu_sensors", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ac5769d9e75c..a7038f039b10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3498,7 +3498,8 @@ fail: static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) { - char *input = amdgpu_lockup_timeout; + char buf[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH]; + char *input = buf; char *timeout_setting = NULL; int index = 0; long timeout; @@ -3508,9 +3509,17 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout = adev->video_timeout = msecs_to_jiffies(2000); - if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) + if (!strnlen(amdgpu_lockup_timeout, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) return 0; + /* + * strsep() destructively modifies its input by replacing delimiters + * with '\0'. Use a stack copy so the global module parameter buffer + * remains intact for multi-GPU systems where this function is called + * once per device. + */ + strscpy(buf, amdgpu_lockup_timeout, sizeof(buf)); + while ((timeout_setting = strsep(&input, ",")) && strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { ret = kstrtol(timeout_setting, 0, &timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 0eb0c62d2f4f..8ec5465c3349 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -324,7 +324,7 @@ static int amdgpu_discovery_get_tmr_info(struct amdgpu_device *adev, ret = amdgpu_acpi_get_tmr_info(adev, &tmr_offset, &tmr_size); if (ret) return ret; - adev->discovery.size = (u32)tmr_size; + adev->discovery.size = DISCOVERY_TMR_SIZE; adev->discovery.offset = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET; } } @@ -1394,6 +1394,9 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev) struct list_head *el, *tmp; struct kset *die_kset; + if (!ip_top) + return; + die_kset = &ip_top->die_kset; spin_lock(&die_kset->list_lock); list_for_each_prev_safe(el, tmp, &die_kset->list) { @@ -1418,9 +1421,13 @@ void amdgpu_discovery_dump(struct amdgpu_device *adev, struct drm_printer *p) struct ip_hw_instance *ip_inst; int i = 0, j; + if (!ip_top) + return; + die_kset = &ip_top->die_kset; drm_printf(p, "\nHW IP Discovery\n"); + spin_lock(&die_kset->list_lock); list_for_each(el_die, &die_kset->list) { drm_printf(p, "die %d\n", i++); @@ -1977,11 +1984,10 @@ static int amdgpu_discovery_refresh_nps_info(struct amdgpu_device *adev, int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, uint32_t *nps_type, - struct amdgpu_gmc_memrange **ranges, + struct amdgpu_gmc_memrange *ranges, int *range_cnt, bool refresh) { uint8_t *discovery_bin = adev->discovery.bin; - struct amdgpu_gmc_memrange *mem_ranges; struct table_info *info; union nps_info *nps_info; union nps_info nps_data; @@ -2019,20 +2025,22 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, switch (le16_to_cpu(nps_info->v1.header.version_major)) { case 1: - mem_ranges = kvzalloc_objs(*mem_ranges, nps_info->v1.count); - if (!mem_ranges) - return -ENOMEM; *nps_type = nps_info->v1.nps_type; + if (*range_cnt < nps_info->v1.count) { + dev_dbg(adev->dev, + "not enough space for nps ranges: %d < %d\n", + *range_cnt, nps_info->v1.count); + return -ENOSPC; + } *range_cnt = nps_info->v1.count; for (i = 0; i < *range_cnt; i++) { - mem_ranges[i].base_address = + ranges[i].base_address = nps_info->v1.instance_info[i].base_address; - mem_ranges[i].limit_address = + ranges[i].limit_address = nps_info->v1.instance_info[i].limit_address; - mem_ranges[i].nid_mask = -1; - mem_ranges[i].flags = 0; + ranges[i].nid_mask = -1; + ranges[i].flags = 0; } - *ranges = mem_ranges; break; default: dev_err(adev->dev, "Unhandled NPS info table %d.%d\n", @@ -2334,6 +2342,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block); break; case IP_VERSION(15, 0, 0): + case IP_VERSION(15, 0, 8): amdgpu_device_ip_block_add(adev, &smu_v15_0_ip_block); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index a7aeb47887a3..0ff1a7923eed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -46,7 +46,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev); int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, uint32_t *nps_type, - struct amdgpu_gmc_memrange **ranges, + struct amdgpu_gmc_memrange *ranges, int *range_cnt, bool refresh); void amdgpu_discovery_dump(struct amdgpu_device *adev, struct drm_printer *p); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index 55e8b9cc2f9f..4c5e38dea4c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -64,10 +64,19 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct *work) container_of(evf_mgr, struct amdgpu_fpriv, evf_mgr); struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; struct dma_fence *ev_fence; + bool cookie; mutex_lock(&uq_mgr->userq_mutex); + + /* + * This is intentionally after taking the userq_mutex since we do + * allocate memory while holding this lock, but only after ensuring that + * the eviction fence is signaled. + */ + cookie = dma_fence_begin_signalling(); + ev_fence = amdgpu_evf_mgr_get_fence(evf_mgr); - amdgpu_userq_evict(uq_mgr, !evf_mgr->shutdown); + amdgpu_userq_evict(uq_mgr); /* * Signaling the eviction fence must be done while holding the @@ -75,7 +84,12 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct *work) * next fence. */ dma_fence_signal(ev_fence); + dma_fence_end_signalling(cookie); dma_fence_put(ev_fence); + + if (!evf_mgr->shutdown) + schedule_delayed_work(&uq_mgr->resume_work, 0); + mutex_unlock(&uq_mgr->userq_mutex); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index d209591e3710..8048a4c04b47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -438,7 +438,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, * @ring: ring to init the fence driver on * * Init the fence driver for the requested ring (all asics). - * Helper function for amdgpu_fence_driver_init(). + * Helper function for amdgpu_fence_driver_sw_init(). */ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 01dc73309d73..5376035d32fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -30,6 +30,7 @@ #include <linux/pagemap.h> #include <linux/pci.h> #include <linux/dma-buf.h> +#include <linux/dma-fence-unwrap.h> #include <drm/amdgpu_drm.h> #include <drm/drm_drv.h> @@ -106,6 +107,7 @@ amdgpu_gem_update_timeline_node(struct drm_file *filp, *chain = dma_fence_chain_alloc(); if (!*chain) { drm_syncobj_put(*syncobj); + *syncobj = NULL; return -ENOMEM; } @@ -741,11 +743,10 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev, struct dma_fence *fence; int r = 0; - /* Always start from the VM's existing last update fence. */ - fence = dma_fence_get(vm->last_update); - + /* If the VM is not ready return only a stub. */ if (!amdgpu_vm_ready(vm)) - return fence; + return dma_fence_get_stub(); + /* * First clean up any freed mappings in the VM. @@ -754,7 +755,7 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev, * schedules GPU work. If nothing needs clearing, @fence can remain as * the original vm->last_update. */ - r = amdgpu_vm_clear_freed(adev, vm, &fence); + r = amdgpu_vm_clear_freed(adev, vm, &vm->last_update); if (r) goto error; @@ -771,47 +772,34 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (r) goto error; - /* - * Decide which fence best represents the last update: - * - * MAP/REPLACE: - * - For always-valid mappings, use vm->last_update. - * - Otherwise, export bo_va->last_pt_update. - * - * UNMAP/CLEAR: - * Keep the fence returned by amdgpu_vm_clear_freed(). If no work was - * needed, it can remain as vm->last_pt_update. - * - * The VM and BO update fences are always initialized to a valid value. - * vm->last_update and bo_va->last_pt_update always start as valid fences. - * and are never expected to be NULL. - */ - switch (operation) { - case AMDGPU_VA_OP_MAP: - case AMDGPU_VA_OP_REPLACE: + if ((operation == AMDGPU_VA_OP_MAP || + operation == AMDGPU_VA_OP_REPLACE) && + !amdgpu_vm_is_bo_always_valid(vm, bo_va->base.bo)) { + /* - * For MAP/REPLACE, return the page table update fence for the - * mapping we just modified. bo_va is expected to be valid here. + * For MAP/REPLACE of non per-VM BOs we need to sync to both the + * bo_va->last_pt_update and vm->last_update or otherwise we + * potentially miss the PDE updates. */ - dma_fence_put(fence); - - if (amdgpu_vm_is_bo_always_valid(vm, bo_va->base.bo)) - fence = dma_fence_get(vm->last_update); - else - fence = dma_fence_get(bo_va->last_pt_update); - break; - case AMDGPU_VA_OP_UNMAP: - case AMDGPU_VA_OP_CLEAR: - default: - /* keep @fence as returned by amdgpu_vm_clear_freed() */ - break; + fence = dma_fence_unwrap_merge(vm->last_update, + bo_va->last_pt_update); + if (!fence) { + /* As fallback in OOM situations */ + dma_fence_wait(vm->last_update, false); + dma_fence_wait(bo_va->last_pt_update, false); + fence = dma_fence_get_stub(); + } + } else { + fence = dma_fence_get(vm->last_update); } + return fence; + error: if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); - return fence; + return dma_fence_get(vm->last_update); } int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, @@ -832,7 +820,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct amdgpu_bo_va *bo_va; struct drm_syncobj *timeline_syncobj = NULL; struct dma_fence_chain *timeline_chain = NULL; - struct dma_fence *fence; struct drm_exec exec; uint64_t vm_size; int r = 0; @@ -884,6 +871,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + if (args->flags & AMDGPU_VM_DELAY_UPDATE && + args->vm_timeline_syncobj_out) + return -EINVAL; + if ((args->operation != AMDGPU_VA_OP_CLEAR) && !(args->flags & AMDGPU_VM_PAGE_PRT)) { gobj = drm_gem_object_lookup(filp, args->handle); @@ -973,11 +964,13 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, * that represents the last relevant update for this mapping. This * fence can then be exported to the user-visible VM timeline. */ - if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) { + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && + (!adev->debug_vm || timeline_syncobj)) { + struct dma_fence *fence; + fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, args->operation); - - if (timeline_syncobj && fence) { + if (timeline_syncobj) { if (!args->vm_timeline_point) { /* Replace the existing fence when no point is given. */ drm_syncobj_replace_fence(timeline_syncobj, @@ -988,6 +981,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, timeline_chain, fence, args->vm_timeline_point); + timeline_chain = NULL; } } dma_fence_put(fence); @@ -995,6 +989,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, } error: + dma_fence_chain_free(timeline_chain); + if (timeline_syncobj) + drm_syncobj_put(timeline_syncobj); drm_exec_fini(&exec); error_put_gobj: drm_gem_object_put(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index a0940db1cd36..860a4405f7dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -1374,18 +1374,18 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev, struct amdgpu_mem_partition_info *mem_ranges, uint8_t *exp_ranges) { - struct amdgpu_gmc_memrange *ranges; + struct amdgpu_gmc_memrange ranges[AMDGPU_MAX_MEM_RANGES]; int range_cnt, ret, i, j; uint32_t nps_type; bool refresh; if (!mem_ranges || !exp_ranges) return -EINVAL; - + range_cnt = AMDGPU_MAX_MEM_RANGES; refresh = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) && (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS); - ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges, - &range_cnt, refresh); + ret = amdgpu_discovery_get_nps_info(adev, &nps_type, ranges, &range_cnt, + refresh); if (ret) return ret; @@ -1446,8 +1446,6 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev, if (!*exp_ranges) *exp_ranges = range_cnt; err: - kvfree(ranges); - return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 64c519cd7395..d88523568b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -35,10 +35,13 @@ * PASIDs are global address space identifiers that can be shared * between the GPU, an IOMMU and the driver. VMs on different devices * may use the same PASID if they share the same address - * space. Therefore PASIDs are allocated using a global IDA. VMs are - * looked up from the PASID per amdgpu_device. + * space. Therefore PASIDs are allocated using IDR cyclic allocator + * (similar to kernel PID allocation) which naturally delays reuse. + * VMs are looked up from the PASID per amdgpu_device. */ -static DEFINE_IDA(amdgpu_pasid_ida); + +static DEFINE_IDR(amdgpu_pasid_idr); +static DEFINE_SPINLOCK(amdgpu_pasid_idr_lock); /* Helper to free pasid from a fence callback */ struct amdgpu_pasid_cb { @@ -50,8 +53,8 @@ struct amdgpu_pasid_cb { * amdgpu_pasid_alloc - Allocate a PASID * @bits: Maximum width of the PASID in bits, must be at least 1 * - * Allocates a PASID of the given width while keeping smaller PASIDs - * available if possible. + * Uses kernel's IDR cyclic allocator (same as PID allocation). + * Allocates sequentially with automatic wrap-around. * * Returns a positive integer on success. Returns %-EINVAL if bits==0. * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on @@ -59,14 +62,15 @@ struct amdgpu_pasid_cb { */ int amdgpu_pasid_alloc(unsigned int bits) { - int pasid = -EINVAL; + int pasid; - for (bits = min(bits, 31U); bits > 0; bits--) { - pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1), - (1U << bits) - 1, GFP_KERNEL); - if (pasid != -ENOSPC) - break; - } + if (bits == 0) + return -EINVAL; + + spin_lock(&amdgpu_pasid_idr_lock); + pasid = idr_alloc_cyclic(&amdgpu_pasid_idr, NULL, 1, + 1U << bits, GFP_KERNEL); + spin_unlock(&amdgpu_pasid_idr_lock); if (pasid >= 0) trace_amdgpu_pasid_allocated(pasid); @@ -81,7 +85,10 @@ int amdgpu_pasid_alloc(unsigned int bits) void amdgpu_pasid_free(u32 pasid) { trace_amdgpu_pasid_freed(pasid); - ida_free(&amdgpu_pasid_ida, pasid); + + spin_lock(&amdgpu_pasid_idr_lock); + idr_remove(&amdgpu_pasid_idr, pasid); + spin_unlock(&amdgpu_pasid_idr_lock); } static void amdgpu_pasid_free_cb(struct dma_fence *fence, @@ -616,3 +623,15 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) } } } + +/** + * amdgpu_pasid_mgr_cleanup - cleanup PASID manager + * + * Cleanup the IDR allocator. + */ +void amdgpu_pasid_mgr_cleanup(void) +{ + spin_lock(&amdgpu_pasid_idr_lock); + idr_destroy(&amdgpu_pasid_idr); + spin_unlock(&amdgpu_pasid_idr_lock); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index b3649cd3af56..a57919478d3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -74,6 +74,7 @@ int amdgpu_pasid_alloc(unsigned int bits); void amdgpu_pasid_free(u32 pasid); void amdgpu_pasid_free_delayed(struct dma_resv *resv, u32 pasid); +void amdgpu_pasid_mgr_cleanup(void); bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 341beec59537..0eecfaa3a94c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -103,10 +103,8 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - if (!amdgpu_ctx_priority_is_valid(args->in.priority)) { - WARN(1, "Invalid context priority %d\n", args->in.priority); + if (!amdgpu_ctx_priority_is_valid(args->in.priority)) return -EINVAL; - } switch (args->in.op) { case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index d94f4966fea9..7f64b783954a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -999,15 +999,11 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) /* Resume all the queues for this process */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { - queue = amdgpu_userq_get(uq_mgr, queue_id); - if (!queue) - continue; if (!amdgpu_userq_buffer_vas_mapped(queue)) { drm_file_err(uq_mgr->file, "trying restore queue without va mapping\n"); queue->state = AMDGPU_USERQ_STATE_INVALID_VA; - amdgpu_userq_put(queue); continue; } @@ -1015,7 +1011,6 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) if (r) ret = r; - amdgpu_userq_put(queue); } if (ret) @@ -1232,10 +1227,8 @@ static void amdgpu_userq_restore_worker(struct work_struct *work) } ret = amdgpu_userq_restore_all(uq_mgr); - if (ret) { + if (ret) drm_file_err(uq_mgr->file, "Failed to restore all queues\n"); - goto unlock; - } unlock: mutex_unlock(&uq_mgr->userq_mutex); @@ -1252,13 +1245,9 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) amdgpu_userq_detect_and_reset_queues(uq_mgr); /* Try to unmap all the queues in this process ctx */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { - queue = amdgpu_userq_get(uq_mgr, queue_id); - if (!queue) - continue; r = amdgpu_userq_preempt_helper(queue); if (r) ret = r; - amdgpu_userq_put(queue); } if (ret) @@ -1291,31 +1280,25 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) int ret; xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { - queue = amdgpu_userq_get(uq_mgr, queue_id); - if (!queue) - continue; - struct dma_fence *f = queue->last_fence; - if (!f || dma_fence_is_signaled(f)) { - amdgpu_userq_put(queue); + if (!f || dma_fence_is_signaled(f)) continue; - } + ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); if (ret <= 0) { drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", f->context, f->seqno); - amdgpu_userq_put(queue); + return -ETIMEDOUT; } - amdgpu_userq_put(queue); } return 0; } void -amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, bool schedule_resume) +amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr) { struct amdgpu_device *adev = uq_mgr->adev; int ret; @@ -1329,8 +1312,6 @@ amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, bool schedule_resume) if (ret) dev_err(adev->dev, "Failed to evict userqueue\n"); - if (schedule_resume) - schedule_delayed_work(&uq_mgr->resume_work, 0); } int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index f0abc16d02cc..a4d44abf24fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -133,8 +133,7 @@ int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *userq_obj); -void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, - bool schedule_resume); +void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr); void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 781896c9fd26..fe6d83e859a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -705,7 +705,7 @@ amdgpu_userq_wait_count_fences(struct drm_file *filp, num_fences++; } - wait_info->num_fences = num_fences; + wait_info->num_fences = min(num_fences, USHRT_MAX); r = 0; error_unlock: @@ -715,6 +715,19 @@ error_unlock: } static int +amdgpu_userq_wait_add_fence(struct drm_amdgpu_userq_wait *wait_info, + struct dma_fence **fences, unsigned int *num_fences, + struct dma_fence *fence) +{ + /* As fallback shouldn't userspace allocate enough space */ + if (*num_fences >= wait_info->num_fences) + return dma_fence_wait(fence, true); + + fences[(*num_fences)++] = dma_fence_get(fence); + return 0; +} + +static int amdgpu_userq_wait_return_fence_info(struct drm_file *filp, struct drm_amdgpu_userq_wait *wait_info, u32 *syncobj_handles, u32 *timeline_points, @@ -757,13 +770,12 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp, goto free_fences; dma_fence_unwrap_for_each(f, &iter, fence) { - if (num_fences >= wait_info->num_fences) { - r = -EINVAL; + r = amdgpu_userq_wait_add_fence(wait_info, fences, + &num_fences, f); + if (r) { dma_fence_put(fence); goto free_fences; } - - fences[num_fences++] = dma_fence_get(f); } dma_fence_put(fence); @@ -780,14 +792,12 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp, if (r) goto free_fences; - if (num_fences >= wait_info->num_fences) { - dma_fence_put(fence); - r = -EINVAL; + r = amdgpu_userq_wait_add_fence(wait_info, fences, + &num_fences, fence); + dma_fence_put(fence); + if (r) goto free_fences; - } - /* Give the reference to the fence array */ - fences[num_fences++] = fence; } /* Lock all the GEM objects */ @@ -817,12 +827,10 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp, dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv, DMA_RESV_USAGE_READ, fence) { - if (num_fences >= wait_info->num_fences) { - r = -EINVAL; + r = amdgpu_userq_wait_add_fence(wait_info, fences, + &num_fences, fence); + if (r) goto error_unlock; - } - - fences[num_fences++] = dma_fence_get(fence); } } @@ -833,12 +841,10 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp, dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv, DMA_RESV_USAGE_WRITE, fence) { - if (num_fences >= wait_info->num_fences) { - r = -EINVAL; + r = amdgpu_userq_wait_add_fence(wait_info, fences, + &num_fences, fence); + if (r) goto error_unlock; - } - - fences[num_fences++] = dma_fence_get(fence); } } @@ -961,13 +967,13 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, } num_read_bo_handles = wait_info->num_bo_read_handles; - ptr = u64_to_user_ptr(wait_info->bo_read_handles), + ptr = u64_to_user_ptr(wait_info->bo_read_handles); r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read); if (r) goto free_timeline_points; num_write_bo_handles = wait_info->num_bo_write_handles; - ptr = u64_to_user_ptr(wait_info->bo_write_handles), + ptr = u64_to_user_ptr(wait_info->bo_write_handles); r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles, &gobj_write); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3f5712fc7216..73abac6be5b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2916,6 +2916,7 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) xa_destroy(&adev->vm_manager.pasids); amdgpu_vmid_mgr_fini(adev); + amdgpu_pasid_mgr_cleanup(); } /** @@ -2991,14 +2992,14 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, if (!root) return false; - addr /= AMDGPU_GPU_PAGE_SIZE; - if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, ts, write_fault)) { + node_id, addr >> PAGE_SHIFT, ts, write_fault)) { amdgpu_bo_unref(&root); return true; } + addr /= AMDGPU_GPU_PAGE_SIZE; + r = amdgpu_bo_reserve(root, true); if (r) goto error_unref; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 1893ceeeb26c..8b60299b73ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6752,7 +6752,7 @@ static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev, /* set up default queue priority level * 0x0 = low priority, 0x1 = high priority */ - if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) + if (prop->hqd_queue_priority == AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) priority = 1; tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index b1a1b8a10a08..78d1f3eb522e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4088,7 +4088,7 @@ static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, /* set up default queue priority level * 0x0 = low priority, 0x1 = high priority */ - if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) + if (prop->hqd_queue_priority == AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) priority = 1; tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c index 5dcc2c32644a..0e9089544769 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_1.c @@ -2227,7 +2227,7 @@ static int mes_v12_1_self_test(struct amdgpu_device *adev, int xcc_id) struct amdgpu_bo *meta_bo = NULL, *ctx_bo = NULL; void *meta_ptr = NULL, *ctx_ptr = NULL; u64 meta_gpu_addr, ctx_gpu_addr; - int size, i, r, pasid;; + int size, i, r, pasid; pasid = amdgpu_pasid_alloc(16); if (pasid < 0) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index e78526a4e521..ff3013b97abd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -134,6 +134,21 @@ static int vcn_v4_0_3_early_init(struct amdgpu_ip_block *ip_block) return 0; } +static bool vcn_v4_0_3_is_psp_fw_reset_supported(struct amdgpu_device *adev) +{ + uint32_t fw_ver = adev->psp.sos.fw_version; + uint32_t pgm = (fw_ver >> 8) & 0xFF; + + /* + * FWDEV-159155: PSP SOS FW must be >= 0x0036015f for program 0x01 + * before enabling VCN per-queue reset. + */ + if (pgm == 1) + return fw_ver >= 0x0036015f; + + return true; +} + static int vcn_v4_0_3_late_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -141,7 +156,9 @@ static int vcn_v4_0_3_late_init(struct amdgpu_ip_block *ip_block) adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - if (amdgpu_dpm_reset_vcn_is_supported(adev) && !amdgpu_sriov_vf(adev)) + if (amdgpu_dpm_reset_vcn_is_supported(adev) && + vcn_v4_0_3_is_psp_fw_reset_supported(adev) && + !amdgpu_sriov_vf(adev)) adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; return 0; |
