diff options
Diffstat (limited to 'drivers/gpu')
192 files changed, 2414 insertions, 1347 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8bc591deb546..fd50da4c7b18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1190,7 +1190,6 @@ struct amdgpu_device { bool apu_prefer_gtt; bool userq_halt_for_enforce_isolation; - struct work_struct userq_reset_work; struct amdgpu_uid *uid_info; struct amdgpu_uma_carveout_info uma_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d9e283f3b57d..9783a3cefb04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -36,6 +36,9 @@ #include "amdgpu_ras.h" #include "amdgpu_umc.h" #include "amdgpu_reset.h" +#if IS_ENABLED(CONFIG_HSA_AMD) +#include "kfd_priv.h" +#endif /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables @@ -320,6 +323,28 @@ void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) (void)amdgpu_reset_domain_schedule(adev->reset_domain, &adev->kfd.reset_work); } +void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev) +{ +#if IS_ENABLED(CONFIG_HSA_AMD) + struct kfd_dev *kfd = adev->kfd.dev; + unsigned int i; + + if (!kfd) + return; + + for (i = 0; i < kfd->num_nodes; i++) { + struct kfd_node *node = kfd->nodes[i]; + + kfd_dev_unmap_mapping_range(KFD_MMAP_TYPE_DOORBELL | + KFD_MMAP_GPU_ID(node->id), + kfd_doorbell_process_slice(kfd)); + kfd_dev_unmap_mapping_range(KFD_MMAP_TYPE_MMIO | + KFD_MMAP_GPU_ID(node->id), + PAGE_SIZE); + } +#endif +} + int amdgpu_amdkfd_alloc_kernel_mem(struct amdgpu_device *adev, size_t size, u32 domain, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool cp_mqd_gfx9) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index cdbab7f8cee8..2b4108f83f48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -358,6 +358,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); +void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev); u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 737ef1ef96a5..feab90e3efd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2839,8 +2839,12 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) * that checks whether the PSP is running. A solution for those issues * in the APU is to trigger a GPU reset, but this should be done during * the unload phase to avoid adding boot latency and screen flicker. + * GFX V11 has GC block as default off IP. Every time AMDGPU driver sends + * a request to PMFW to unload MP1, PMFW will put GC in reset and power down + * the voltage. Hence, skipping reset for APUs with GFX V11 or later. */ - if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) { + if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 0, 0)) { r = amdgpu_asic_reset(adev); if (r) dev_err(adev->dev, "asic reset on %s failed\n", __func__); @@ -3783,7 +3787,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, } INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); - INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work); amdgpu_coredump_init(adev); @@ -5474,7 +5477,7 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) cancel_work(&adev->reset_work); #endif - cancel_work(&adev->userq_reset_work); + amdgpu_userq_mgr_cancel_reset_work(adev); if (adev->kfd.dev) cancel_work(&adev->kfd.reset_work); @@ -5832,6 +5835,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* We need to lock reset domain only once both for XGMI and single device */ amdgpu_device_recovery_get_reset_lock(adev, &device_list); + /* unmap all the mappings of doorbell and framebuffer to prevent user space from + * accessing them + */ + unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); + amdgpu_amdkfd_clear_kfd_mapping(adev); + amdgpu_device_halt_activities(adev, job, reset_context, &device_list, hive, need_emergency_restart); if (need_emergency_restart) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index fcad7daaa41b..80efeca0ab73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -304,7 +304,7 @@ static int amdgpu_discovery_get_tmr_info(struct amdgpu_device *adev, adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset; adev->discovery.size = adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb << 10; - if (!adev->discovery.offset || !adev->discovery.size) + if (!adev->discovery.size) return -EINVAL; } else { goto out; @@ -3090,10 +3090,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 3): - adev->family = AMDGPU_FAMILY_GC_11_5_0; - break; case IP_VERSION(11, 5, 4): - adev->family = AMDGPU_FAMILY_GC_11_5_4; + adev->family = AMDGPU_FAMILY_GC_11_5_0; break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e47921e2a9af..60debd543e44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -3149,17 +3149,15 @@ static int __init amdgpu_init(void) r = amdgpu_sync_init(); if (r) - goto error_sync; - - r = amdgpu_userq_fence_slab_init(); - if (r) - goto error_fence; + return r; amdgpu_register_atpx_handler(); amdgpu_acpi_detect(); - /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */ - amdgpu_amdkfd_init(); + /* Ignore KFD init failures when CONFIG_HSA_AMD is not set. */ + r = amdgpu_amdkfd_init(); + if (r && r != -ENOENT) + goto error_fini_sync; if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) { add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); @@ -3170,10 +3168,8 @@ static int __init amdgpu_init(void) /* let modprobe override vga console setting */ return pci_register_driver(&amdgpu_kms_pci_driver); -error_fence: +error_fini_sync: amdgpu_sync_fini(); - -error_sync: return r; } @@ -3184,7 +3180,6 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_acpi_release(); amdgpu_sync_fini(); - amdgpu_userq_fence_slab_fini(); mmu_notifier_synchronize(); amdgpu_xcp_drv_release(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index bc772ca3dab7..b6f849d51c2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -262,12 +262,19 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) */ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) { + int r; + if (adev->gart.bo != NULL) return 0; - return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo, - NULL, (void *)&adev->gart.ptr); + r = amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo, + NULL, (void *)&adev->gart.ptr); + if (r) + return r; + + memset_io(adev->gart.ptr, adev->gart.gart_pte_flags, adev->gart.table_size); + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 5376035d32fe..123d4a09114d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -31,6 +31,7 @@ #include <linux/pci.h> #include <linux/dma-buf.h> #include <linux/dma-fence-unwrap.h> +#include <linux/uaccess.h> #include <drm/amdgpu_drm.h> #include <drm/drm_drv.h> @@ -508,6 +509,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (offset_in_page(args->addr | args->size)) return -EINVAL; + if (!access_ok((void __user *)(uintptr_t)args->addr, args->size)) + return -EFAULT; + /* reject unknown flag values */ if (args->flags & ~(AMDGPU_GEM_USERPTR_READONLY | AMDGPU_GEM_USERPTR_ANONONLY | AMDGPU_GEM_USERPTR_VALIDATE | @@ -821,7 +825,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_syncobj *timeline_syncobj = NULL; struct dma_fence_chain *timeline_chain = NULL; struct drm_exec exec; - uint64_t vm_size; + uint64_t vm_size, tmp; int r = 0; /* Validate virtual address range against reserved regions. */ @@ -845,7 +849,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; vm_size -= AMDGPU_VA_RESERVED_TOP; - if (args->va_address + args->map_size > vm_size) { + if (check_add_overflow(args->va_address, args->map_size, &tmp) || tmp > vm_size) { dev_dbg(dev->dev, "va_address 0x%llx is in top reserved area 0x%llx\n", args->va_address + args->map_size, vm_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 285e217fba04..3d9497d121ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -314,7 +314,10 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, mc->gart_start = max_mc_address - mc->gart_size + 1; break; case AMDGPU_GART_PLACEMENT_LOW: - mc->gart_start = 0; + if (size_bf >= mc->gart_size) + mc->gart_start = 0; + else + mc->gart_start = ALIGN(mc->fb_end, four_gb); break; case AMDGPU_GART_PLACEMENT_BEST_FIT: default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 620fddde4c4d..a5d26b943f6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -199,11 +199,18 @@ int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr, enum drm_mm_insert_mode mode) { struct amdgpu_device *adev = container_of(mgr, typeof(*adev), mman.gtt_mgr); + u32 alignment = 0; int r; + /* Align to TLB L2 cache entry size to work around "V bit HW bug" */ + if (adev->asic_type == CHIP_TAHITI) { + alignment = 32 * 1024 / AMDGPU_GPU_PAGE_SIZE; + num_pages = ALIGN(num_pages, alignment); + } + spin_lock(&mgr->lock); r = drm_mm_insert_node_in_range(&mgr->mm, mm_node, num_pages, - 0, GART_ENTRY_WITHOUT_BO_COLOR, 0, + alignment, GART_ENTRY_WITHOUT_BO_COLOR, 0, adev->gmc.gart_size >> PAGE_SHIFT, mode); spin_unlock(&mgr->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 06efce38f323..71272f40feef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -873,68 +873,59 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ? -EFAULT : 0; } case AMDGPU_INFO_READ_MMR_REG: { - int ret = 0; - unsigned int n, alloc_size; - uint32_t *regs; unsigned int se_num = (info->read_mmr_reg.instance >> AMDGPU_INFO_MMR_SE_INDEX_SHIFT) & AMDGPU_INFO_MMR_SE_INDEX_MASK; unsigned int sh_num = (info->read_mmr_reg.instance >> AMDGPU_INFO_MMR_SH_INDEX_SHIFT) & AMDGPU_INFO_MMR_SH_INDEX_MASK; - - if (!down_read_trylock(&adev->reset_domain->sem)) - return -ENOENT; + unsigned int alloc_size; + uint32_t *regs; + int ret; /* set full masks if the userspace set all bits * in the bitfields */ - if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) { + if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) se_num = 0xffffffff; - } else if (se_num >= AMDGPU_GFX_MAX_SE) { - ret = -EINVAL; - goto out; - } + else if (se_num >= AMDGPU_GFX_MAX_SE) + return -EINVAL; - if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) { + if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) sh_num = 0xffffffff; - } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) { - ret = -EINVAL; - goto out; - } + else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) + return -EINVAL; - if (info->read_mmr_reg.count > 128) { - ret = -EINVAL; - goto out; - } + if (info->read_mmr_reg.count > 128) + return -EINVAL; - regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); - if (!regs) { - ret = -ENOMEM; - goto out; - } + regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), + GFP_KERNEL); + if (!regs) + return -ENOMEM; + down_read(&adev->reset_domain->sem); alloc_size = info->read_mmr_reg.count * sizeof(*regs); - amdgpu_gfx_off_ctrl(adev, false); + ret = 0; for (i = 0; i < info->read_mmr_reg.count; i++) { if (amdgpu_asic_read_register(adev, se_num, sh_num, info->read_mmr_reg.dword_offset + i, ®s[i])) { DRM_DEBUG_KMS("unallowed offset %#x\n", info->read_mmr_reg.dword_offset + i); - kfree(regs); - amdgpu_gfx_off_ctrl(adev, true); ret = -EFAULT; - goto out; + break; } } amdgpu_gfx_off_ctrl(adev, true); - n = copy_to_user(out, regs, min(size, alloc_size)); - kfree(regs); - ret = (n ? -EFAULT : 0); -out: up_read(&adev->reset_domain->sem); + + if (!ret) { + ret = copy_to_user(out, regs, min(size, alloc_size)) + ? -EFAULT : 0; + } + kfree(regs); return ret; } case AMDGPU_INFO_DEV_INFO: { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 912c9afaf9e1..4d68732d6223 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -96,7 +96,8 @@ struct amdgpu_bo_va { * if non-zero, cannot unmap from GPU because user queues may still access it */ unsigned int queue_refcount; - atomic_t userq_va_mapped; + /* Indicates if this buffer is mapped for any user queue. Once set, never reset. */ + bool userq_va_mapped; }; struct amdgpu_bo { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index cdf4909592d2..0c57fe259894 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1950,7 +1950,7 @@ void amdgpu_ras_check_bad_page_status(struct amdgpu_device *adev) if (!control || amdgpu_bad_page_threshold == 0) return; - if (control->ras_num_bad_pages >= ras->bad_page_cnt_threshold) { + if (control->ras_num_bad_pages > ras->bad_page_cnt_threshold) { if (amdgpu_dpm_send_rma_reason(adev)) dev_warn(adev->dev, "Unable to send out-of-band RMA CPER"); else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 66e8a2f7afcf..d6bee5c30073 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -552,8 +552,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { struct amdgpu_ring *ring = file_inode(f)->i_private; - uint32_t value, result, early[3]; + u32 value, result, early[3] = { 0 }; uint64_t p; + u32 avail_dw, start_dw, read_dw; loff_t i; int r; @@ -565,10 +566,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, result = 0; - if (*pos < 12) { - if (ring->funcs->type == AMDGPU_RING_TYPE_CPER) - mutex_lock(&ring->adev->cper.ring_lock); + if (ring->funcs->type == AMDGPU_RING_TYPE_CPER) + mutex_lock(&ring->adev->cper.ring_lock); + if (*pos < 12) { early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; early[2] = ring->wptr & ring->buf_mask; @@ -600,13 +601,24 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, *pos += 4; } } else { + early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; + early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; + p = early[0]; if (early[0] <= early[1]) - size = (early[1] - early[0]); + avail_dw = early[1] - early[0]; else - size = ring->ring_size - (early[0] - early[1]); + avail_dw = ring->buf_mask + 1 - (early[0] - early[1]); - while (size) { + start_dw = (*pos > 12) ? ((*pos - 12) >> 2) : 0; + if (start_dw >= avail_dw) + goto out; + + p = (p + start_dw) & ring->ptr_mask; + avail_dw -= start_dw; + read_dw = min_t(u32, avail_dw, size >> 2); + + while (read_dw) { if (p == early[1]) goto out; @@ -619,9 +631,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, buf += 4; result += 4; - size--; + read_dw--; p++; p &= ring->ptr_mask; + *pos += 4; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index a0b479d5fff1..f4be19223588 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -175,11 +175,14 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, { unsigned long bit_pos; - bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); - if (bit_pos >= adev->seq64.num_sem) - return -ENOSPC; + for (;;) { + bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); + if (bit_pos >= adev->seq64.num_sem) + return -ENOSPC; - __set_bit(bit_pos, adev->seq64.used); + if (!test_and_set_bit(bit_pos, adev->seq64.used)) + break; + } *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev); @@ -205,7 +208,7 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va) bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64); if (bit_pos < adev->seq64.num_sem) - __clear_bit(bit_pos, adev->seq64.used); + clear_bit(bit_pos, adev->seq64.used); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0dc68fb9d88e..3d2e00efc741 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -75,6 +75,9 @@ static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev, unsigned int type, uint64_t size_in_page) { + if (!size_in_page) + return 0; + return ttm_range_man_init(&adev->mman.bdev, type, false, size_in_page); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 0238c2798de4..b8ed931f8a40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -130,6 +130,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && adev->umc.ras->ras_block.hw_ops->query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { + kfree(err_data->err_addr); err_data->err_addr = kzalloc_objs(struct eeprom_table_record, adev->umc.max_ras_err_cnt_per_query); @@ -160,6 +161,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if (adev->umc.ras && adev->umc.ras->ecc_info_query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { + kfree(err_data->err_addr); err_data->err_addr = kzalloc_objs(struct eeprom_table_record, adev->umc.max_ras_err_cnt_per_query); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index d5abf785ca17..f070ea37d918 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -82,19 +82,11 @@ static bool amdgpu_userq_is_reset_type_supported(struct amdgpu_device *adev, return false; } -static void amdgpu_userq_gpu_reset(struct amdgpu_device *adev) -{ - if (amdgpu_device_should_recover_gpu(adev)) { - amdgpu_reset_domain_schedule(adev->reset_domain, - &adev->userq_reset_work); - /* Wait for the reset job to complete */ - flush_work(&adev->userq_reset_work); - } -} - -static int -amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) +static void amdgpu_userq_mgr_reset_work(struct work_struct *work) { + struct amdgpu_userq_mgr *uq_mgr = + container_of(work, struct amdgpu_userq_mgr, + reset_work); struct amdgpu_device *adev = uq_mgr->adev; const int queue_types[] = { AMDGPU_RING_TYPE_COMPUTE, @@ -103,15 +95,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) }; const int num_queue_types = ARRAY_SIZE(queue_types); bool gpu_reset = false; - int r = 0; - int i; - - /* Warning if current process mutex is not held */ - WARN_ON(!mutex_is_locked(&uq_mgr->userq_mutex)); + int i, r; if (unlikely(adev->debug_disable_gpu_ring_reset)) { dev_err(adev->dev, "userq reset disabled by debug mask\n"); - return 0; + return; } /* @@ -119,7 +107,7 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) * skip all reset detection logic */ if (!amdgpu_gpu_recovery) - return 0; + return; /* * Iterate through all queue types to detect and reset problematic queues @@ -127,9 +115,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) */ for (i = 0; i < num_queue_types; i++) { int ring_type = queue_types[i]; - const struct amdgpu_userq_funcs *funcs = adev->userq_funcs[ring_type]; + const struct amdgpu_userq_funcs *funcs = + adev->userq_funcs[ring_type]; - if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, AMDGPU_RESET_TYPE_PER_QUEUE)) + if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, + AMDGPU_RESET_TYPE_PER_QUEUE)) continue; if (atomic_read(&uq_mgr->userq_count[ring_type]) > 0 && @@ -142,46 +132,43 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr) } } - if (gpu_reset) - amdgpu_userq_gpu_reset(adev); + if (gpu_reset) { + struct amdgpu_reset_context reset_context; - return r; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + reset_context.src = AMDGPU_RESET_SRC_USERQ; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/ + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } } static void amdgpu_userq_hang_detect_work(struct work_struct *work) { - struct amdgpu_usermode_queue *queue = container_of(work, - struct amdgpu_usermode_queue, - hang_detect_work.work); - struct dma_fence *fence; - struct amdgpu_userq_mgr *uq_mgr; - - if (!queue->userq_mgr) - return; - - uq_mgr = queue->userq_mgr; - fence = READ_ONCE(queue->hang_detect_fence); - /* Fence already signaled – no action needed */ - if (!fence || dma_fence_is_signaled(fence)) - return; + struct amdgpu_usermode_queue *queue = + container_of(work, struct amdgpu_usermode_queue, + hang_detect_work.work); - mutex_lock(&uq_mgr->userq_mutex); - amdgpu_userq_detect_and_reset_queues(uq_mgr); - mutex_unlock(&uq_mgr->userq_mutex); + /* + * Don't schedule the work here! Scheduling or queue work from one reset + * handler to another is illegal if you don't take extra precautions! + */ + amdgpu_userq_mgr_reset_work(&queue->userq_mgr->reset_work); } /* * Start hang detection for a user queue fence. A delayed work will be scheduled - * to check if the fence is still pending after the timeout period. -*/ + * to reset the queues when the fence doesn't signal in time. + */ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev; unsigned long timeout_ms; - if (!queue || !queue->userq_mgr || !queue->userq_mgr->adev) - return; - adev = queue->userq_mgr->adev; /* Determine timeout based on queue type */ switch (queue->queue_type) { @@ -199,16 +186,33 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue) break; } - /* Store the fence to monitor and schedule hang detection */ - WRITE_ONCE(queue->hang_detect_fence, queue->last_fence); - schedule_delayed_work(&queue->hang_detect_work, - msecs_to_jiffies(timeout_ms)); + queue_delayed_work(adev->reset_domain->wq, &queue->hang_detect_work, + msecs_to_jiffies(timeout_ms)); } -static void amdgpu_userq_init_hang_detect_work(struct amdgpu_usermode_queue *queue) +void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell) { - INIT_DELAYED_WORK(&queue->hang_detect_work, amdgpu_userq_hang_detect_work); - queue->hang_detect_fence = NULL; + struct xarray *xa = &adev->userq_doorbell_xa; + struct amdgpu_usermode_queue *queue; + unsigned long flags; + int r; + + xa_lock_irqsave(xa, flags); + queue = xa_load(xa, doorbell); + if (queue) { + r = amdgpu_userq_fence_driver_process(queue->fence_drv); + /* + * We are in interrupt context here, this *can't* wait for + * reset work to finish. + */ + if (r >= 0) + cancel_delayed_work(&queue->hang_detect_work); + + /* Restart the timer when there are still fences pending */ + if (r == 1) + amdgpu_userq_start_hang_detect_work(queue); + } + xa_unlock_irqrestore(xa, flags); } static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue, @@ -223,7 +227,7 @@ static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue, INIT_LIST_HEAD(&va_cursor->list); va_cursor->gpu_addr = addr; - atomic_set(&va_map->bo_va->userq_va_mapped, 1); + va_map->bo_va->userq_va_mapped = true; list_add(&va_cursor->list, &queue->userq_va_list); return 0; @@ -270,7 +274,7 @@ static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr) dma_resv_assert_held(vm->root.bo->tbo.base.resv); mapping = amdgpu_vm_bo_lookup_mapping(vm, addr); - if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped)) + if (!IS_ERR_OR_NULL(mapping) && mapping->bo_va->userq_va_mapped) r = true; else r = false; @@ -296,16 +300,8 @@ static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue) return false; } -static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping, - struct amdgpu_userq_va_cursor *va_cursor) -{ - atomic_set(&mapping->bo_va->userq_va_mapped, 0); - list_del(&va_cursor->list); - kfree(va_cursor); -} - -static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, - struct amdgpu_usermode_queue *queue) +static void amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, + struct amdgpu_usermode_queue *queue) { struct amdgpu_userq_va_cursor *va_cursor, *tmp; struct amdgpu_bo_va_mapping *mapping; @@ -315,15 +311,12 @@ static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev, list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) { mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr); - if (!mapping) { - return -EINVAL; - } - dev_dbg(adev->dev, "delete the userq:%p va:%llx\n", - queue, va_cursor->gpu_addr); - amdgpu_userq_buffer_va_list_del(mapping, va_cursor); + if (mapping) + dev_dbg(adev->dev, "delete the userq:%p va:%llx\n", + queue, va_cursor->gpu_addr); + list_del(&va_cursor->list); + kfree(va_cursor); } - - return 0; } static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue) @@ -332,23 +325,18 @@ static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; - bool found_hung_queue = false; - int r = 0; + int r; if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { r = userq_funcs->preempt(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; - found_hung_queue = true; + return r; } else { queue->state = AMDGPU_USERQ_STATE_PREEMPTED; } } - - if (found_hung_queue) - amdgpu_userq_detect_and_reset_queues(uq_mgr); - - return r; + return 0; } static int amdgpu_userq_restore_helper(struct amdgpu_usermode_queue *queue) @@ -377,24 +365,21 @@ static int amdgpu_userq_unmap_helper(struct amdgpu_usermode_queue *queue) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; - bool found_hung_queue = false; - int r = 0; + int r; if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) || - (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { + (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { + r = userq_funcs->unmap(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; - found_hung_queue = true; + return r; } else { queue->state = AMDGPU_USERQ_STATE_UNMAPPED; } } - if (found_hung_queue) - amdgpu_userq_detect_and_reset_queues(uq_mgr); - - return r; + return 0; } static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue) @@ -403,19 +388,19 @@ static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *userq_funcs = adev->userq_funcs[queue->queue_type]; - int r = 0; + int r; if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) { r = userq_funcs->map(queue); if (r) { queue->state = AMDGPU_USERQ_STATE_HUNG; - amdgpu_userq_detect_and_reset_queues(uq_mgr); + return r; } else { queue->state = AMDGPU_USERQ_STATE_MAPPED; } } - return r; + return 0; } static void amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue) @@ -512,16 +497,20 @@ int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, goto free_obj; } + r = amdgpu_bo_pin(userq_obj->obj, AMDGPU_GEM_DOMAIN_GTT); + if (r) + goto unresv; + r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo); if (r) { drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r); - goto unresv; + goto unpin_bo; } r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr); if (r) { drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r); - goto unresv; + goto unpin_bo; } userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj); @@ -529,11 +518,13 @@ int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, memset(userq_obj->cpu_ptr, 0, size); return 0; +unpin_bo: + amdgpu_bo_unpin(userq_obj->obj); unresv: amdgpu_bo_unreserve(userq_obj->obj); - free_obj: amdgpu_bo_unref(&userq_obj->obj); + return r; } @@ -541,6 +532,7 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *userq_obj) { amdgpu_bo_kunmap(userq_obj->obj); + amdgpu_bo_unpin(userq_obj->obj); amdgpu_bo_unref(&userq_obj->obj); } @@ -635,20 +627,12 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que amdgpu_bo_unreserve(vm->root.bo); mutex_lock(&uq_mgr->userq_mutex); - queue->hang_detect_fence = NULL; amdgpu_userq_wait_for_last_fence(queue); #if defined(CONFIG_DEBUG_FS) debugfs_remove_recursive(queue->debugfs_queue); #endif - amdgpu_userq_detect_and_reset_queues(uq_mgr); r = amdgpu_userq_unmap_helper(queue); - /*TODO: It requires a reset for userq hw unmap error*/ - if (r) { - drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n"); - queue->state = AMDGPU_USERQ_STATE_HUNG; - } - atomic_dec(&uq_mgr->userq_count[queue->queue_type]); amdgpu_userq_cleanup(queue); mutex_unlock(&uq_mgr->userq_mutex); @@ -724,14 +708,14 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) const struct amdgpu_userq_funcs *uq_funcs; struct amdgpu_usermode_queue *queue; struct amdgpu_db_info db_info; - bool skip_map_queue; - u32 qid; uint64_t index; - int r = 0; - int priority = - (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> - AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; + int priority; + u32 qid; + int r; + priority = + (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) + >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; r = amdgpu_userq_priority_permit(filp, priority); if (r) return r; @@ -744,40 +728,43 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) uq_funcs = adev->userq_funcs[args->in.ip_type]; if (!uq_funcs) { - drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n", - args->in.ip_type); r = -EINVAL; goto err_pm_runtime; } queue = kzalloc_obj(struct amdgpu_usermode_queue); if (!queue) { - drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n"); r = -ENOMEM; goto err_pm_runtime; } + kref_init(&queue->refcount); INIT_LIST_HEAD(&queue->userq_va_list); queue->doorbell_handle = args->in.doorbell_handle; queue->queue_type = args->in.ip_type; queue->vm = &fpriv->vm; queue->priority = priority; - - db_info.queue_type = queue->queue_type; - db_info.doorbell_handle = queue->doorbell_handle; - db_info.db_obj = &queue->db_obj; - db_info.doorbell_offset = args->in.doorbell_offset; - queue->userq_mgr = uq_mgr; + INIT_DELAYED_WORK(&queue->hang_detect_work, + amdgpu_userq_hang_detect_work); - /* Validate the userq virtual address.*/ - r = amdgpu_bo_reserve(fpriv->vm.root.bo, false); + mutex_init(&queue->fence_drv_lock); + xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); + r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv); if (r) goto free_queue; - if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, args->in.queue_size) || - amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || - amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { + /* Make sure the queue can actually run with those virtual addresses. */ + r = amdgpu_bo_reserve(fpriv->vm.root.bo, false); + if (r) + goto free_fence_drv; + + if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, + args->in.queue_size) || + amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, + AMDGPU_GPU_PAGE_SIZE) || + amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, + AMDGPU_GPU_PAGE_SIZE)) { r = -EINVAL; amdgpu_bo_unreserve(fpriv->vm.root.bo); goto clean_mapping; @@ -785,6 +772,10 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) amdgpu_bo_unreserve(fpriv->vm.root.bo); /* Convert relative doorbell offset into absolute doorbell index */ + db_info.queue_type = queue->queue_type; + db_info.doorbell_handle = queue->doorbell_handle; + db_info.db_obj = &queue->db_obj; + db_info.doorbell_offset = args->in.doorbell_offset; index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp); if (index == (uint64_t)-EINVAL) { drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); @@ -793,79 +784,64 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) } queue->doorbell_index = index; - xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); - r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv); - if (r) { - drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n"); - goto clean_mapping; - } - r = uq_funcs->mqd_create(queue, &args->in); if (r) { drm_file_err(uq_mgr->file, "Failed to create Queue\n"); - goto clean_fence_driver; + goto clean_mapping; } + /* Update VM owner at userq submit-time for page-fault attribution. */ + amdgpu_vm_set_task_info(&fpriv->vm); + + r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, + GFP_KERNEL)); + if (r) + goto clean_mqd; + amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); /* don't map the queue if scheduling is halted */ - if (adev->userq_halt_for_enforce_isolation && - ((queue->queue_type == AMDGPU_HW_IP_GFX) || - (queue->queue_type == AMDGPU_HW_IP_COMPUTE))) - skip_map_queue = true; - else - skip_map_queue = false; - if (!skip_map_queue) { + if (!adev->userq_halt_for_enforce_isolation || + ((queue->queue_type != AMDGPU_HW_IP_GFX) && + (queue->queue_type != AMDGPU_HW_IP_COMPUTE))) { r = amdgpu_userq_map_helper(queue); if (r) { drm_file_err(uq_mgr->file, "Failed to map Queue\n"); - goto clean_mqd; + mutex_unlock(&uq_mgr->userq_mutex); + goto clean_doorbell; } } - /* drop this refcount during queue destroy */ - kref_init(&queue->refcount); - - /* Wait for mode-1 reset to complete */ - down_read(&adev->reset_domain->sem); + atomic_inc(&uq_mgr->userq_count[queue->queue_type]); + mutex_unlock(&uq_mgr->userq_mutex); r = xa_alloc(&uq_mgr->userq_xa, &qid, queue, - XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL); - if (r) { - if (!skip_map_queue) - amdgpu_userq_unmap_helper(queue); - r = -ENOMEM; - goto clean_reset_domain; - } - - r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL)); + XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), + GFP_KERNEL); if (r) { - xa_erase(&uq_mgr->userq_xa, qid); - if (!skip_map_queue) - amdgpu_userq_unmap_helper(queue); - goto clean_reset_domain; + /* + * This drops the last reference which should take care of + * all cleanup. + */ + amdgpu_userq_put(queue); + return r; } - up_read(&adev->reset_domain->sem); amdgpu_debugfs_userq_init(filp, queue, qid); - amdgpu_userq_init_hang_detect_work(queue); - args->out.queue_id = qid; - atomic_inc(&uq_mgr->userq_count[queue->queue_type]); - mutex_unlock(&uq_mgr->userq_mutex); return 0; -clean_reset_domain: - up_read(&adev->reset_domain->sem); +clean_doorbell: + xa_erase_irq(&adev->userq_doorbell_xa, index); clean_mqd: - mutex_unlock(&uq_mgr->userq_mutex); uq_funcs->mqd_destroy(queue); -clean_fence_driver: - amdgpu_userq_fence_driver_free(queue); clean_mapping: amdgpu_bo_reserve(fpriv->vm.root.bo, true); amdgpu_userq_buffer_vas_list_cleanup(adev, queue); amdgpu_bo_unreserve(fpriv->vm.root.bo); + mutex_destroy(&queue->fence_drv_lock); +free_fence_drv: + amdgpu_userq_fence_driver_free(queue); free_queue: kfree(queue); err_pm_runtime: @@ -1187,7 +1163,7 @@ retry_lock: bo = range->bo; ret = amdgpu_ttm_tt_get_user_pages(bo, range); if (ret) - goto unlock_all; + goto free_ranges; } invalidated = true; @@ -1214,6 +1190,7 @@ retry_lock: unlock_all: drm_exec_fini(&exec); +free_ranges: xa_for_each(&xa, tmp_key, range) { if (!range) continue; @@ -1254,7 +1231,6 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) unsigned long queue_id; int ret = 0, r; - amdgpu_userq_detect_and_reset_queues(uq_mgr); /* Try to unmap all the queues in this process ctx */ xa_for_each(&uq_mgr->userq_xa, queue_id, queue) { r = amdgpu_userq_preempt_helper(queue); @@ -1262,29 +1238,16 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) ret = r; } - if (ret) + if (ret) { drm_file_err(uq_mgr->file, "Couldn't unmap all the queues, eviction failed ret=%d\n", ret); + amdgpu_reset_domain_schedule(uq_mgr->adev->reset_domain, + &uq_mgr->reset_work); + flush_work(&uq_mgr->reset_work); + } return ret; } -void amdgpu_userq_reset_work(struct work_struct *work) -{ - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, - userq_reset_work); - struct amdgpu_reset_context reset_context; - - memset(&reset_context, 0, sizeof(reset_context)); - - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - reset_context.src = AMDGPU_RESET_SRC_USERQ; - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/ - - amdgpu_device_gpu_recover(adev, NULL, &reset_context); -} - static void amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) { @@ -1318,9 +1281,24 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f userq_mgr->file = file_priv; INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); + INIT_WORK(&userq_mgr->reset_work, amdgpu_userq_mgr_reset_work); return 0; } +void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev) +{ + struct xarray *xa = &adev->userq_doorbell_xa; + struct amdgpu_usermode_queue *queue; + unsigned long flags, queue_id; + + xa_lock_irqsave(xa, flags); + xa_for_each(xa, queue_id, queue) { + cancel_delayed_work(&queue->hang_detect_work); + cancel_work(&queue->userq_mgr->reset_work); + } + xa_unlock_irqrestore(xa, flags); +} + void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr) { cancel_delayed_work_sync(&userq_mgr->resume_work); @@ -1346,6 +1324,14 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) } xa_destroy(&userq_mgr->userq_xa); + + /* + * Drain any in-flight reset_work. By this point all queues are freed + * and userq_count is 0, so if reset_work starts now it exits early. + * We still need to wait in case it was already executing gpu_recover. + */ + cancel_work_sync(&userq_mgr->reset_work); + mutex_destroy(&userq_mgr->userq_mutex); } @@ -1364,7 +1350,6 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev) uqm = queue->userq_mgr; cancel_delayed_work_sync(&uqm->resume_work); guard(mutex)(&uqm->userq_mutex); - amdgpu_userq_detect_and_reset_queues(uqm); if (adev->in_s0ix) r = amdgpu_userq_preempt_helper(queue); else @@ -1423,7 +1408,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, if (((queue->queue_type == AMDGPU_HW_IP_GFX) || (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && (queue->xcp_id == idx)) { - amdgpu_userq_detect_and_reset_queues(uqm); r = amdgpu_userq_preempt_helper(queue); if (r) ret = r; @@ -1496,23 +1480,21 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev) { const struct amdgpu_userq_funcs *userq_funcs; struct amdgpu_usermode_queue *queue; - struct amdgpu_userq_mgr *uqm; unsigned long queue_id; + /* TODO: We probably need a new lock for the queue state */ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) { - uqm = queue->userq_mgr; - cancel_delayed_work_sync(&uqm->resume_work); - if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { - amdgpu_userq_wait_for_last_fence(queue); - userq_funcs = adev->userq_funcs[queue->queue_type]; - userq_funcs->unmap(queue); - /* just mark all queues as hung at this point. - * if unmap succeeds, we could map again - * in amdgpu_userq_post_reset() if vram is not lost - */ - queue->state = AMDGPU_USERQ_STATE_HUNG; - amdgpu_userq_fence_driver_force_completion(queue); - } + if (queue->state != AMDGPU_USERQ_STATE_MAPPED) + continue; + + userq_funcs = adev->userq_funcs[queue->queue_type]; + userq_funcs->unmap(queue); + /* just mark all queues as hung at this point. + * if unmap succeeds, we could map again + * in amdgpu_userq_post_reset() if vram is not lost + */ + queue->state = AMDGPU_USERQ_STATE_HUNG; + amdgpu_userq_fence_driver_force_completion(queue); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h index 675fe6395ac8..49b33e2d6932 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h @@ -66,14 +66,31 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_obj db_obj; struct amdgpu_userq_obj fw_obj; struct amdgpu_userq_obj wptr_obj; + + /** + * @fence_drv_lock: Protecting @fence_drv_xa. + */ + struct mutex fence_drv_lock; + + /** + * @fence_drv_xa: + * + * References to the external fence drivers returned by wait_ioctl. + * Dropped on the next signaled dma_fence or queue destruction. + */ struct xarray fence_drv_xa; struct amdgpu_userq_fence_driver *fence_drv; struct dma_fence *last_fence; u32 xcp_id; int priority; struct dentry *debugfs_queue; - struct delayed_work hang_detect_work; - struct dma_fence *hang_detect_fence; + + /** + * @hang_detect_work: + * + * Delayed work which runs when userq_fences time out. + */ + struct delayed_work hang_detect_work; struct kref refcount; struct list_head userq_va_list; @@ -105,6 +122,13 @@ struct amdgpu_userq_mgr { struct amdgpu_device *adev; struct delayed_work resume_work; struct drm_file *file; + + /** + * @reset_work: + * + * Reset work which is used when eviction fails. + */ + struct work_struct reset_work; atomic_t userq_count[AMDGPU_RING_TYPE_MAX]; }; @@ -123,6 +147,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, struct amdgpu_device *adev); +void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev); void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr); void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr); @@ -156,6 +181,7 @@ void amdgpu_userq_reset_work(struct work_struct *work); void amdgpu_userq_pre_reset(struct amdgpu_device *adev); int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost); void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue); +void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell); int amdgpu_userq_input_va_validate(struct amdgpu_device *adev, struct amdgpu_usermode_queue *queue, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index da39ac862f37..a41fb72dba94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -32,29 +32,9 @@ #include "amdgpu.h" #include "amdgpu_userq_fence.h" -static const struct dma_fence_ops amdgpu_userq_fence_ops; -static struct kmem_cache *amdgpu_userq_fence_slab; - #define AMDGPU_USERQ_MAX_HANDLES (1U << 16) -int amdgpu_userq_fence_slab_init(void) -{ - amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence", - sizeof(struct amdgpu_userq_fence), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!amdgpu_userq_fence_slab) - return -ENOMEM; - - return 0; -} - -void amdgpu_userq_fence_slab_fini(void) -{ - rcu_barrier(); - kmem_cache_destroy(amdgpu_userq_fence_slab); -} +static const struct dma_fence_ops amdgpu_userq_fence_ops; static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f) { @@ -141,6 +121,7 @@ amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq) userq->last_fence = NULL; amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa); xa_destroy(&userq->fence_drv_xa); + mutex_destroy(&userq->fence_drv_lock); /* Drop the queue's ownership reference to fence_drv explicitly */ amdgpu_userq_fence_driver_put(userq->fence_drv); } @@ -154,7 +135,14 @@ amdgpu_userq_fence_put_fence_drv_array(struct amdgpu_userq_fence *userq_fence) userq_fence->fence_drv_array_count = 0; } -void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) +/* + * Returns: + * -ENOENT when no fences were processes + * 1 when more fences are pending + * 0 when no fences are pending any more + */ +int +amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv) { struct amdgpu_userq_fence *userq_fence, *tmp; LIST_HEAD(to_be_signaled); @@ -162,9 +150,6 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d unsigned long flags; u64 rptr; - if (!fence_drv) - return; - spin_lock_irqsave(&fence_drv->fence_list_lock, flags); rptr = amdgpu_userq_fence_read(fence_drv); @@ -177,6 +162,9 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d &userq_fence->link); spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); + if (list_empty(&to_be_signaled)) + return -ENOENT; + list_for_each_entry_safe(userq_fence, tmp, &to_be_signaled, link) { fence = &userq_fence->base; list_del_init(&userq_fence->link); @@ -188,6 +176,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d dma_fence_put(fence); } + /* That doesn't need to be accurate so no locking */ + return list_empty(&fence_drv->fences) ? 0 : 1; } void amdgpu_userq_fence_driver_destroy(struct kref *ref) @@ -229,80 +219,84 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); } -static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) +static int amdgpu_userq_fence_alloc(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence **pfence) { - *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); - return *userq_fence ? 0 : -ENOMEM; + struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv; + struct amdgpu_userq_fence *userq_fence; + void *entry; + + userq_fence = kmalloc(sizeof(*userq_fence), GFP_KERNEL); + if (!userq_fence) + return -ENOMEM; + + /* + * Get the next unused entry, since we fill from the start this can be + * used as size to allocate the array. + */ + mutex_lock(&userq->fence_drv_lock); + XA_STATE(xas, &userq->fence_drv_xa, 0); + + rcu_read_lock(); + do { + entry = xas_find_marked(&xas, ULONG_MAX, XA_FREE_MARK); + } while (xas_retry(&xas, entry)); + rcu_read_unlock(); + + userq_fence->fence_drv_array = kvmalloc_array(xas.xa_index, + sizeof(fence_drv), + GFP_KERNEL); + if (!userq_fence->fence_drv_array) { + mutex_unlock(&userq->fence_drv_lock); + kfree(userq_fence); + return -ENOMEM; + } + + userq_fence->fence_drv_array_count = xas.xa_index; + xa_extract(&userq->fence_drv_xa, (void **)userq_fence->fence_drv_array, + 0, ULONG_MAX, xas.xa_index, XA_PRESENT); + xa_destroy(&userq->fence_drv_xa); + + mutex_unlock(&userq->fence_drv_lock); + + amdgpu_userq_fence_driver_get(fence_drv); + userq_fence->fence_drv = fence_drv; + + *pfence = userq_fence; + return 0; } -static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, - struct amdgpu_userq_fence *userq_fence, - u64 seq, struct dma_fence **f) +static void amdgpu_userq_fence_init(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence *fence, + u64 seq) { - struct amdgpu_userq_fence_driver *fence_drv; - struct dma_fence *fence; + struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv; unsigned long flags; bool signaled = false; - fence_drv = userq->fence_drv; - if (!fence_drv) - return -EINVAL; - - spin_lock_init(&userq_fence->lock); - INIT_LIST_HEAD(&userq_fence->link); - fence = &userq_fence->base; - userq_fence->fence_drv = fence_drv; - - dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock, + spin_lock_init(&fence->lock); + dma_fence_init64(&fence->base, &amdgpu_userq_fence_ops, &fence->lock, fence_drv->context, seq); - amdgpu_userq_fence_driver_get(fence_drv); - dma_fence_get(fence); - - if (!xa_empty(&userq->fence_drv_xa)) { - struct amdgpu_userq_fence_driver *stored_fence_drv; - unsigned long index, count = 0; - int i = 0; - - xa_lock(&userq->fence_drv_xa); - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) - count++; - - userq_fence->fence_drv_array = - kvmalloc_objs(struct amdgpu_userq_fence_driver *, count, - GFP_ATOMIC); - - if (userq_fence->fence_drv_array) { - xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) { - userq_fence->fence_drv_array[i] = stored_fence_drv; - __xa_erase(&userq->fence_drv_xa, index); - i++; - } - } - - userq_fence->fence_drv_array_count = i; - xa_unlock(&userq->fence_drv_xa); - } else { - userq_fence->fence_drv_array = NULL; - userq_fence->fence_drv_array_count = 0; - } + /* Make sure the fence is visible to the hang detect worker */ + dma_fence_put(userq->last_fence); + userq->last_fence = dma_fence_get(&fence->base); - /* Check if hardware has already processed the job */ + /* Check if hardware has already processed the fence */ spin_lock_irqsave(&fence_drv->fence_list_lock, flags); - if (!dma_fence_is_signaled(fence)) { - list_add_tail(&userq_fence->link, &fence_drv->fences); + if (!dma_fence_is_signaled(&fence->base)) { + dma_fence_get(&fence->base); + list_add_tail(&fence->link, &fence_drv->fences); } else { + INIT_LIST_HEAD(&fence->link); signaled = true; - dma_fence_put(fence); } spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); if (signaled) - amdgpu_userq_fence_put_fence_drv_array(userq_fence); - - *f = fence; - - return 0; + amdgpu_userq_fence_put_fence_drv_array(fence); + else + amdgpu_userq_start_hang_detect_work(userq); } static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) @@ -342,7 +336,7 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu) amdgpu_userq_fence_driver_put(fence_drv); kvfree(userq_fence->fence_drv_array); - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + kfree(userq_fence); } static void amdgpu_userq_fence_release(struct dma_fence *f) @@ -376,56 +370,48 @@ static int amdgpu_userq_fence_read_wptr(struct amdgpu_device *adev, { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo; + struct drm_exec exec; u64 addr, *ptr; - int r; - - r = amdgpu_bo_reserve(queue->vm->root.bo, false); - if (r) - return r; + int ret; addr = queue->userq_prop->wptr_gpu_addr; addr &= AMDGPU_GMC_HOLE_MASK; - mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); - if (!mapping) { - amdgpu_bo_unreserve(queue->vm->root.bo); - DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n"); - return -EINVAL; - } + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(queue->vm, &exec, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto lock_error; + + mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT); + if (!mapping) { + ret = -EINVAL; + goto lock_error; + } - bo = amdgpu_bo_ref(mapping->bo_va->base.bo); - amdgpu_bo_unreserve(queue->vm->root.bo); - r = amdgpu_bo_reserve(bo, true); - if (r) { - amdgpu_bo_unref(&bo); - DRM_ERROR("Failed to reserve userqueue wptr bo"); - return r; + ret = drm_exec_lock_obj(&exec, &mapping->bo_va->base.bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto lock_error; } - r = amdgpu_bo_kmap(bo, (void **)&ptr); - if (r) { + bo = mapping->bo_va->base.bo; + ret = amdgpu_bo_kmap(bo, (void **)&ptr); + if (ret) { DRM_ERROR("Failed mapping the userqueue wptr bo"); - goto map_error; + goto lock_error; } *wptr = le64_to_cpu(*ptr); amdgpu_bo_kunmap(bo); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - + drm_exec_fini(&exec); return 0; -map_error: - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - - return r; -} - -static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) -{ - dma_fence_put(fence); +lock_error: + drm_exec_fini(&exec); + return ret; } static void @@ -471,13 +457,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, const unsigned int num_read_bo_handles = args->num_bo_read_handles; struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr; + struct drm_gem_object **gobj_write, **gobj_read; u32 *syncobj_handles, num_syncobj_handles; - struct amdgpu_userq_fence *userq_fence; - struct amdgpu_usermode_queue *queue = NULL; - struct drm_syncobj **syncobj = NULL; - struct dma_fence *fence; + struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_fence *fence; + struct drm_syncobj **syncobj; struct drm_exec exec; + void __user *ptr; int r, i, entry; u64 wptr; @@ -489,13 +476,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, return -EINVAL; num_syncobj_handles = args->num_syncobj_handles; - syncobj_handles = memdup_array_user(u64_to_user_ptr(args->syncobj_handles), - num_syncobj_handles, sizeof(u32)); + ptr = u64_to_user_ptr(args->syncobj_handles); + syncobj_handles = memdup_array_user(ptr, num_syncobj_handles, + sizeof(u32)); if (IS_ERR(syncobj_handles)) return PTR_ERR(syncobj_handles); - /* Array of pointers to the looked up syncobjs */ - syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL); + syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), + GFP_KERNEL); if (!syncobj) { r = -ENOMEM; goto free_syncobj_handles; @@ -509,21 +497,17 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } - r = drm_gem_objects_lookup(filp, - u64_to_user_ptr(args->bo_read_handles), - num_read_bo_handles, - &gobj_read); + ptr = u64_to_user_ptr(args->bo_read_handles); + r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read); if (r) goto free_syncobj; - r = drm_gem_objects_lookup(filp, - u64_to_user_ptr(args->bo_write_handles), - num_write_bo_handles, + ptr = u64_to_user_ptr(args->bo_write_handles); + r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles, &gobj_write); if (r) goto put_gobj_read; - /* Retrieve the user queue */ queue = amdgpu_userq_get(userq_mgr, args->queue_id); if (!queue) { r = -ENOENT; @@ -532,73 +516,61 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, r = amdgpu_userq_fence_read_wptr(adev, queue, &wptr); if (r) - goto put_gobj_write; + goto put_queue; - r = amdgpu_userq_fence_alloc(&userq_fence); + r = amdgpu_userq_fence_alloc(queue, &fence); if (r) - goto put_gobj_write; + goto put_queue; /* We are here means UQ is active, make sure the eviction fence is valid */ amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); - /* Create a new fence */ - r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); - if (r) { - mutex_unlock(&userq_mgr->userq_mutex); - kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); - goto put_gobj_write; - } + /* Create the new fence */ + amdgpu_userq_fence_init(queue, fence, wptr); - dma_fence_put(queue->last_fence); - queue->last_fence = dma_fence_get(fence); - amdgpu_userq_start_hang_detect_work(queue); mutex_unlock(&userq_mgr->userq_mutex); + /* + * This needs to come after the fence is created since + * amdgpu_userq_ensure_ev_fence() can't be called while holding the resv + * locks. + */ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, (num_read_bo_handles + num_write_bo_handles)); - /* Lock all BOs with retry handling */ drm_exec_until_all_locked(&exec) { - r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); + r = drm_exec_prepare_array(&exec, gobj_read, + num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - amdgpu_userq_fence_cleanup(fence); + if (r) goto exec_fini; - } - r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); + r = drm_exec_prepare_array(&exec, gobj_write, + num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - amdgpu_userq_fence_cleanup(fence); + if (r) goto exec_fini; - } } - for (i = 0; i < num_read_bo_handles; i++) { - if (!gobj_read || !gobj_read[i]->resv) - continue; - - dma_resv_add_fence(gobj_read[i]->resv, fence, + /* And publish the new fence in the BOs and syncobj */ + for (i = 0; i < num_read_bo_handles; i++) + dma_resv_add_fence(gobj_read[i]->resv, &fence->base, DMA_RESV_USAGE_READ); - } - for (i = 0; i < num_write_bo_handles; i++) { - if (!gobj_write || !gobj_write[i]->resv) - continue; - - dma_resv_add_fence(gobj_write[i]->resv, fence, + for (i = 0; i < num_write_bo_handles; i++) + dma_resv_add_fence(gobj_write[i]->resv, &fence->base, DMA_RESV_USAGE_WRITE); - } - /* Add the created fence to syncobj/BO's */ for (i = 0; i < num_syncobj_handles; i++) - drm_syncobj_replace_fence(syncobj[i], fence); + drm_syncobj_replace_fence(syncobj[i], &fence->base); +exec_fini: /* drop the reference acquired in fence creation function */ - dma_fence_put(fence); + dma_fence_put(&fence->base); -exec_fini: drm_exec_fini(&exec); +put_queue: + amdgpu_userq_put(queue); put_gobj_write: for (i = 0; i < num_write_bo_handles; i++) drm_gem_object_put(gobj_write[i]); @@ -609,15 +581,11 @@ put_gobj_read: kvfree(gobj_read); free_syncobj: while (entry-- > 0) - if (syncobj[entry]) - drm_syncobj_put(syncobj[entry]); + drm_syncobj_put(syncobj[entry]); kfree(syncobj); free_syncobj_handles: kfree(syncobj_handles); - if (queue) - amdgpu_userq_put(queue); - return r; } @@ -892,8 +860,10 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp, * Otherwise, we would gather those references until we don't * have any more space left and crash. */ + mutex_lock(&waitq->fence_drv_lock); r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv, xa_limit_32b, GFP_KERNEL); + mutex_unlock(&waitq->fence_drv_lock); if (r) goto put_waitq; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index d56246ad8c26..0bd51616cef1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -58,15 +58,12 @@ struct amdgpu_userq_fence_driver { char timeline_name[TASK_COMM_LEN]; }; -int amdgpu_userq_fence_slab_init(void); -void amdgpu_userq_fence_slab_fini(void); - void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_userq_fence_driver **fence_drv_req); void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq); -void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); +int amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq); void amdgpu_userq_fence_driver_destroy(struct kref *ref); int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 115a7b269af3..fccd758b6699 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2002,7 +2002,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, * during user requests GEM unmap IOCTL except for forcing the unmap * from user space. */ - if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) + if (unlikely(bo_va->userq_va_mapped)) amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr); list_del(&mapping->list); @@ -3023,11 +3023,22 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, is_compute_context = vm->is_compute_context; - if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr >> PAGE_SHIFT, ts, write_fault)) { + if (is_compute_context) { + /* Unreserve root since svm_range_restore_pages might try to reserve it. */ + /* TODO: rework svm_range_restore_pages so that this isn't necessary. */ amdgpu_bo_unreserve(root); + + if (!svm_range_restore_pages(adev, pasid, vmid, + node_id, addr >> PAGE_SHIFT, ts, write_fault)) { + amdgpu_bo_unref(&root); + return true; + } amdgpu_bo_unref(&root); - return true; + + /* Re-acquire the VM lock, could be that the VM was freed in between. */ + vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid); + if (!vm) + return false; } addr /= AMDGPU_GPU_PAGE_SIZE; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index fd881388d612..f27f917e3cdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -562,6 +562,11 @@ static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr, amdgpu_ring_write(ring, 0); } + /* WA: Force sync after TRAP to avoid VPE1 fail to power off */ + if (ring->adev->vpe.collaborate_mode) { + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COLLAB_SYNC, 0)); + amdgpu_ring_write(ring, 0xabcd); + } } static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -968,7 +973,7 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .emit_frame_size = 5 + /* vpe_ring_init_cond_exec */ 6 + /* vpe_ring_emit_pipeline_sync */ - 10 + 10 + 10 + /* vpe_ring_emit_fence */ + 12 + 12 + 12 + /* vpe_ring_emit_fence */ /* vpe_ring_emit_vm_flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8c82e90f871b..d40ab1e95480 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6523,15 +6523,7 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: CP EOP\n"); if (adev->enable_mes && doorbell_offset) { - struct amdgpu_usermode_queue *queue; - struct xarray *xa = &adev->userq_doorbell_xa; - unsigned long flags; - - xa_lock_irqsave(xa, flags); - queue = xa_load(xa, doorbell_offset); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 65c33823a688..c35372e21261 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -602,6 +602,13 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) "amdgpu/%s_pfp.bin", ucode_prefix); if (err) goto out; + + adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( + (union amdgpu_firmware_header *) + adev->gfx.pfp_fw->data, 2, 0); + if (adev->gfx.rs64_enable) + dev_dbg(adev->dev, "CP RS64 enable\n"); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); @@ -4854,15 +4861,7 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: CP EOP\n"); if (adev->enable_mes && doorbell_offset) { - struct xarray *xa = &adev->userq_doorbell_xa; - struct amdgpu_usermode_queue *queue; - unsigned long flags; - - xa_lock_irqsave(xa, flags); - queue = xa_load(xa, doorbell_offset); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c index 68fd3c04134d..68db1bc73bc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c @@ -3643,16 +3643,7 @@ static int gfx_v12_1_eop_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: CP EOP\n"); if (adev->enable_mes && doorbell_offset) { - struct xarray *xa = &adev->userq_doorbell_xa; - struct amdgpu_usermode_queue *queue; - unsigned long flags; - - xa_lock_irqsave(xa, flags); - queue = xa_load(xa, doorbell_offset); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 73223d97a87f..ac90d8e9d86a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1571,6 +1571,71 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +/** + * gfx_v6_0_setup_tcc() - setup which TCCs are used + * + * @adev: amdgpu_device pointer + * + * Verify whether the current GPU has any TCCs disabled, + * which can happen when the GPU is harvested and some + * memory channels are disabled, reducing the memory bus width. + * For example, on the Radeon HD 7870 XT (Tahiti LE). + * + * If some TCCs are disabled, we need to make sure that + * the disabled TCCs are not used, and the remaining TCCs + * are used optimally. + * + * TCP_CHAN_STEER_LO/HI control which TCC is used by TCP channels. + * TCP_ADDR_CONFIG.NUM_TCC_BANKS controls how many channels are used. + * + * For optimal performance: + * - Rely on the CHAN_STEER from the golden registers table, + * only skip disabled TCCs but keep the mapping order. + * - Limit NUM_TCC_BANKS to number of active TCCs to avoid thrashing, + * which performs better than using the same TCC twice. + */ +static void gfx_v6_0_setup_tcc(struct amdgpu_device *adev) +{ + u32 i, tcc, tcp_addr_config, num_active_tcc = 0; + u64 chan_steer, patched_chan_steer = 0; + const u32 num_max_tcc = adev->gfx.config.max_texture_channel_caches; + const u32 dis_tcc_mask = + amdgpu_gfx_create_bitmask(num_max_tcc) & + (REG_GET_FIELD(RREG32(mmCGTS_TCC_DISABLE), + CGTS_TCC_DISABLE, TCC_DISABLE) | + REG_GET_FIELD(RREG32(mmCGTS_USER_TCC_DISABLE), + CGTS_USER_TCC_DISABLE, TCC_DISABLE)); + + /* When no TCC is disabled, the golden registers table already has optimal TCC setup */ + if (!dis_tcc_mask) + return; + + /* Each 4-bit nibble contains the index of a TCC used by all TCPs */ + chan_steer = RREG32(mmTCP_CHAN_STEER_LO) | ((u64)RREG32(mmTCP_CHAN_STEER_HI) << 32ull); + + /* Patch the TCP to TCC mapping to skip disabled TCCs */ + for (i = 0; i < num_max_tcc; ++i) { + tcc = (chan_steer >> (u64)(4 * i)) & 0xf; + + if (!((1 << tcc) & dis_tcc_mask)) { + /* Copy enabled TCC indices to the patched register value. */ + patched_chan_steer |= (u64)tcc << (u64)(4 * num_active_tcc); + ++num_active_tcc; + } + } + + WARN_ON(num_active_tcc != num_max_tcc - hweight32(dis_tcc_mask)); + + /* Patch number of TCCs used by TCPs */ + tcp_addr_config = REG_SET_FIELD(RREG32(mmTCP_ADDR_CONFIG), + TCP_ADDR_CONFIG, NUM_TCC_BANKS, + num_active_tcc - 1); + + WREG32(mmTCP_ADDR_CONFIG, tcp_addr_config); + WREG32(mmTCP_CHAN_STEER_HI, upper_32_bits(patched_chan_steer)); + WREG32(mmTCP_CHAN_STEER_LO, lower_32_bits(patched_chan_steer)); +} + static void gfx_v6_0_config_init(struct amdgpu_device *adev) { adev->gfx.config.double_offchip_lds_buf = 0; @@ -1729,6 +1794,7 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev) gfx_v6_0_tiling_mode_table_init(adev); gfx_v6_0_setup_rb(adev); + gfx_v6_0_setup_tcc(adev); gfx_v6_0_setup_spi(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 95be105671ec..86c7c2a429b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5660,9 +5660,6 @@ static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, { struct amdgpu_device *adev = ring->adev; - /* we only allocate 32bit for each seq wb address */ - BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - /* write fence seq to the "addr" */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 9fe8d10ab270..cffb1e6bab35 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -802,6 +802,7 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v2_0_dec_ring_get_rptr, .get_wptr = jpeg_v2_0_dec_ring_get_wptr, .set_wptr = jpeg_v2_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 20983f126b49..13a6e24c624a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -693,6 +693,7 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, @@ -724,6 +725,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 98f5e0622bc5..d0445df39d2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -594,6 +594,7 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v3_0_dec_ring_get_rptr, .get_wptr = jpeg_v3_0_dec_ring_get_wptr, .set_wptr = jpeg_v3_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 0bd83820dd20..6fd4238a8471 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -759,6 +759,7 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v4_0_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 82abe181c730..0c746580de11 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1219,6 +1219,7 @@ static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 54fd9c800c40..a43582b9c876 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -804,6 +804,7 @@ static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index 46bf15dce2bd..72a4b2d0676f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -680,6 +680,7 @@ static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index edecbfe66c79..250316704dfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -884,6 +884,7 @@ static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c index 285c459379c4..7a4ecea6b39a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c @@ -703,6 +703,7 @@ static const struct amd_ip_funcs jpeg_v5_0_2_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v5_0_2_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v5_0_2_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_2_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_2_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c index 1821dced936f..e7546816baba 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c @@ -661,6 +661,7 @@ static const struct amd_ip_funcs jpeg_v5_3_0_ip_funcs = { static const struct amdgpu_ring_funcs jpeg_v5_3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, + .no_user_fence = true, .get_rptr = jpeg_v5_3_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_3_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_3_0_dec_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 2fc39a6938f6..5b4121ddc78c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -31,89 +31,68 @@ #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE static int -mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) -{ - int ret; - - ret = amdgpu_bo_reserve(bo, true); - if (ret) { - DRM_ERROR("Failed to reserve bo. ret %d\n", ret); - goto err_reserve_bo_failed; - } - - ret = amdgpu_ttm_alloc_gart(&bo->tbo); - if (ret) { - DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); - goto err_map_bo_gart_failed; - } - - amdgpu_bo_unreserve(bo); - bo = amdgpu_bo_ref(bo); - - return 0; - -err_map_bo_gart_failed: - amdgpu_bo_unreserve(bo); -err_reserve_bo_failed: - return ret; -} - -static int mes_userq_create_wptr_mapping(struct amdgpu_device *adev, struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, uint64_t wptr) { struct amdgpu_bo_va_mapping *wptr_mapping; - struct amdgpu_vm *wptr_vm; struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj; + struct amdgpu_bo *obj; + struct amdgpu_vm *vm = queue->vm; + struct drm_exec exec; int ret; - wptr_vm = queue->vm; - ret = amdgpu_bo_reserve(wptr_vm->root.bo, false); - if (ret) - return ret; - wptr &= AMDGPU_GMC_HOLE_MASK; - wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT); - amdgpu_bo_unreserve(wptr_vm->root.bo); - if (!wptr_mapping) { - DRM_ERROR("Failed to lookup wptr bo\n"); - return -EINVAL; - } - wptr_obj->obj = wptr_mapping->bo_va->base.bo; - if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { - DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n"); - return -EINVAL; - } + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto fail_lock; + + wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> PAGE_SHIFT); + if (!wptr_mapping) { + ret = -EINVAL; + goto fail_lock; + } - ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj); - if (ret) { - DRM_ERROR("Failed to map wptr bo to GART\n"); - return ret; + obj = wptr_mapping->bo_va->base.bo; + ret = drm_exec_lock_obj(&exec, &obj->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto fail_lock; } - ret = amdgpu_bo_reserve(wptr_obj->obj, true); - if (ret) { - DRM_ERROR("Failed to reserve wptr bo\n"); - return ret; + wptr_obj->obj = amdgpu_bo_ref(wptr_mapping->bo_va->base.bo); + if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) { + ret = -EINVAL; + goto fail_map; } /* TODO use eviction fence instead of pinning. */ ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT); if (ret) { - drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin wptr bo\n"); - goto unresv_bo; + DRM_ERROR("Failed to pin wptr bo. ret %d\n", ret); + goto fail_map; + } + + ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo); + if (ret) { + DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret); + goto fail_map; } queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj); - amdgpu_bo_unreserve(wptr_obj->obj); + drm_exec_fini(&exec); return 0; -unresv_bo: - amdgpu_bo_unreserve(wptr_obj->obj); +fail_map: + amdgpu_bo_unref(&wptr_obj->obj); +fail_lock: + drm_exec_fini(&exec); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 44f0f23e1148..e64f2f6df9a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -889,7 +889,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); @@ -899,7 +899,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se addr += 4; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); /* zero in first two bits */ - BUG_ON(addr & 0x3); + WARN_ON(addr & 0x3); amdgpu_ring_write(ring, lower_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, upper_32_bits(seq)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 0f530bb8a9a3..8ca46e1e474e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1662,17 +1662,8 @@ static int sdma_v6_0_process_fence_irq(struct amdgpu_device *adev, u32 doorbell_offset = entry->src_data[0]; if (adev->enable_mes && doorbell_offset) { - struct amdgpu_usermode_queue *queue; - struct xarray *xa = &adev->userq_doorbell_xa; - unsigned long flags; - doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; - - xa_lock_irqsave(xa, flags); - queue = xa_load(xa, doorbell_offset); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 9ed817b69a3b..37191e2918d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1594,17 +1594,8 @@ static int sdma_v7_0_process_fence_irq(struct amdgpu_device *adev, u32 doorbell_offset = entry->src_data[0]; if (adev->enable_mes && doorbell_offset) { - struct xarray *xa = &adev->userq_doorbell_xa; - struct amdgpu_usermode_queue *queue; - unsigned long flags; - doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; - - xa_lock_irqsave(xa, flags); - queue = xa_load(xa, doorbell_offset); - if (queue) - amdgpu_userq_fence_driver_process(queue->fence_drv); - xa_unlock_irqrestore(xa, flags); + amdgpu_userq_process_fence_irq(adev, doorbell_offset); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index fea576a7f397..efb3fde919ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -242,6 +242,10 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev) uint64_t addr; uint32_t size; + /* When the keyselect is already set, don't perturb it. */ + if (RREG32(mmUVD_FW_START)) + return; + /* program the VCPU memory controller bits 0-27 */ addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; @@ -284,6 +288,12 @@ static int uvd_v3_1_fw_validate(struct amdgpu_device *adev) int i; uint32_t keysel = adev->uvd.keyselect; + if (RREG32(mmUVD_FW_START) & UVD_FW_STATUS__PASS_MASK) { + dev_dbg(adev->dev, "UVD keyselect already set: 0x%x (on CPU: 0x%x)\n", + RREG32(mmUVD_FW_START), adev->uvd.keyselect); + return 0; + } + WREG32(mmUVD_FW_START, keysel); for (i = 0; i < 10; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c index 5b7b46d242c6..93253db5e2de 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c @@ -42,9 +42,10 @@ #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" +#define VCE_V1_0_ALIGNMENT (32 * 1024) #define VCE_V1_0_FW_SIZE (256 * 1024) #define VCE_V1_0_STACK_SIZE (64 * 1024) -#define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1)) +#define VCE_V1_0_DATA_SIZE (ALIGN(7808 * (AMDGPU_MAX_VCE_HANDLES + 1), VCE_V1_0_ALIGNMENT)) #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev); @@ -177,7 +178,7 @@ static void vce_v1_0_init_cg(struct amdgpu_device *adev) } /** - * vce_v1_0_load_fw_signature - load firmware signature into VCPU BO + * vce_v1_0_load_fw() - load firmware signature into VCPU BO * * @adev: amdgpu_device pointer * @@ -185,21 +186,26 @@ static void vce_v1_0_init_cg(struct amdgpu_device *adev) * This function finds the signature appropriate for the current * ASIC and writes that into the VCPU BO. */ -static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev) +static int vce_v1_0_load_fw(struct amdgpu_device *adev) { const struct common_firmware_header *hdr; struct vce_v1_0_fw_signature *sign; - unsigned int ucode_offset; + u32 ucode_offset; + u32 ucode_size; uint32_t chip_id; u32 *cpu_addr; int i; hdr = (const struct common_firmware_header *)adev->vce.fw->data; ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + ucode_size = hdr->ucode_size_bytes - sizeof(struct vce_v1_0_fw_signature *); cpu_addr = adev->vce.cpu_addr; sign = (void *)adev->vce.fw->data + ucode_offset; + if (ucode_size > VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET) + return -EINVAL; + switch (adev->asic_type) { case CHIP_TAHITI: chip_id = 0x01000014; @@ -226,12 +232,14 @@ static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev) return -EINVAL; } + memset_io(&cpu_addr[0], 0, amdgpu_bo_size(adev->vce.vcpu_bo)); + cpu_addr += (256 - 64) / 4; memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16); cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64); memset_io(&cpu_addr[5], 0, 44); - memcpy_toio(&cpu_addr[16], &sign[1], hdr->ucode_size_bytes - sizeof(*sign)); + memcpy_toio(&cpu_addr[16], &sign[1], ucode_size); cpu_addr += (le32_to_cpu(sign->length) + 64) / 4; memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16); @@ -312,18 +320,23 @@ static int vce_v1_0_mc_resume(struct amdgpu_device *adev) WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES); offset = adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET; - size = VCE_V1_0_FW_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); + size = VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET; + WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); offset += size; size = VCE_V1_0_STACK_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff); + WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT)); + WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT)); + WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset); WREG32(mmVCE_VCPU_CACHE_SIZE1, size); offset += size; size = VCE_V1_0_DATA_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff); + WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT)); + WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT)); + WARN_ON((offset + size - adev->vce.gpu_addr) > amdgpu_bo_size(adev->vce.vcpu_bo)); + WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset); WREG32(mmVCE_VCPU_CACHE_SIZE2, size); WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100); @@ -527,22 +540,31 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block) * To accomodate that, we put GART to the LOW address range * and reserve some GART pages where we map the VCPU BO, * so that it gets a 32-bit address. + * + * The BAR address is zero and we can't change it + * due to the firmware validation mechanism. + * It seems that it fails to initialize if the address is >= 128 MiB. */ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) { u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo); - u64 max_vcpu_bo_addr = 0xffffffff - bo_size; + u64 max_vcpu_bo_addr = 0x07ffffff - bo_size; u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE; u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo); u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID; u64 vce_gart_start_offs; int r; - r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, - &adev->vce.gart_node, num_pages, - DRM_MM_INSERT_LOW); - if (r) - return r; + if (adev->gmc.vram_start < adev->gmc.gart_start) + return amdgpu_bo_gpu_offset(adev->vce.vcpu_bo) <= max_vcpu_bo_addr ? 0 : -EINVAL; + + if (!drm_mm_node_allocated(&adev->vce.gart_node)) { + r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr, + &adev->vce.gart_node, num_pages, + DRM_MM_INSERT_LOW); + if (r) + return r; + } vce_gart_start_offs = amdgpu_gtt_node_to_byte_offset(&adev->vce.gart_node); @@ -553,8 +575,6 @@ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev) amdgpu_gart_map_vram_range(adev, pa, adev->vce.gart_node.start, num_pages, flags, adev->gart.ptr); adev->vce.gpu_addr = adev->gmc.gart_start + vce_gart_start_offs; - if (adev->vce.gpu_addr > max_vcpu_bo_addr) - return -EINVAL; return 0; } @@ -574,10 +594,7 @@ static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - r = amdgpu_vce_resume(adev); - if (r) - return r; - r = vce_v1_0_load_fw_signature(adev); + r = vce_v1_0_load_fw(adev); if (r) return r; r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev); @@ -696,10 +713,7 @@ static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int r; - r = amdgpu_vce_resume(adev); - if (r) - return r; - r = vce_v1_0_load_fw_signature(adev); + r = vce_v1_0_load_fw(adev); if (r) return r; r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index db149eda6204..3a6fc8604108 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -37,9 +37,14 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" + +/* Use 24K to be safe. The FW supposedly only requires 23744 bytes. */ +#define VCE_V2_0_DATA_ENTRY_SIZE (24 * 1024) + #define VCE_V2_0_FW_SIZE (256 * 1024) #define VCE_V2_0_STACK_SIZE (64 * 1024) -#define VCE_V2_0_DATA_SIZE (23552 * AMDGPU_MAX_VCE_HANDLES) +#define VCE_V2_0_DATA_SIZE (VCE_V2_0_DATA_ENTRY_SIZE * (AMDGPU_MAX_VCE_HANDLES + 1)) + #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev); @@ -183,7 +188,7 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev) WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); offset = AMDGPU_VCE_FIRMWARE_OFFSET; - size = VCE_V2_0_FW_SIZE; + size = VCE_V2_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET; WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 03d79e464f04..c69f7d82060f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -574,7 +574,7 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx) } else WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); offset = AMDGPU_VCE_FIRMWARE_OFFSET; - size = VCE_V3_0_FW_SIZE; + size = VCE_V3_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET; WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index e35fae9cdaf6..0442bfcfd384 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -2113,6 +2113,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .no_user_fence = true, .secure_submission_supported = true, .get_rptr = vcn_v2_0_dec_ring_get_rptr, .get_wptr = vcn_v2_0_dec_ring_get_wptr, @@ -2145,6 +2146,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v2_0_enc_ring_get_rptr, .get_wptr = vcn_v2_0_enc_ring_get_wptr, .set_wptr = vcn_v2_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 006a15451197..8b8184fe6764 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1778,6 +1778,7 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .no_user_fence = true, .secure_submission_supported = true, .get_rptr = vcn_v2_5_dec_ring_get_rptr, .get_wptr = vcn_v2_5_dec_ring_get_wptr, @@ -1879,6 +1880,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v2_5_enc_ring_get_rptr, .get_wptr = vcn_v2_5_enc_ring_get_wptr, .set_wptr = vcn_v2_5_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 6fb4fcdbba4f..81bba3ec2a93 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1856,6 +1856,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0x3f, .nop = VCN_DEC_SW_CMD_NO_OP, + .no_user_fence = true, .secure_submission_supported = true, .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, @@ -1972,6 +1973,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { uint32_t offset, size, *create; + uint64_t buf_end; if (msg[0] != RDECODE_MESSAGE_CREATE) continue; @@ -1979,7 +1981,8 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, offset = msg[1]; size = msg[2]; - if (size < 4 || offset + size > end - addr) { + if (size < 4 || check_add_overflow(offset, size, &buf_end) || + buf_end > end - addr) { DRM_ERROR("VCN message buffer exceeds BO bounds!\n"); r = -EINVAL; goto out; @@ -2036,6 +2039,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .no_user_fence = true, .secure_submission_supported = true, .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, @@ -2138,6 +2142,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v3_0_enc_ring_get_rptr, .get_wptr = vcn_v3_0_enc_ring_get_wptr, .set_wptr = vcn_v3_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 5dec92691f73..ff7269bafae8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1889,6 +1889,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { uint32_t offset, size, *create; + uint64_t buf_end; if (msg[0] != RDECODE_MESSAGE_CREATE) continue; @@ -1896,7 +1897,8 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, offset = msg[1]; size = msg[2]; - if (size < 4 || offset + size > end - addr) { + if (size < 4 || check_add_overflow(offset, size, &buf_end) || + buf_end > end - addr) { DRM_ERROR("VCN message buffer exceeds BO bounds!\n"); r = -EINVAL; goto out; @@ -1994,6 +1996,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .extra_bytes = sizeof(struct amdgpu_vcn_rb_metadata), .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index ff3013b97abd..10e8fc2821f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1775,6 +1775,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v4_0_3_unified_ring_get_rptr, .get_wptr = vcn_v4_0_3_unified_ring_get_wptr, .set_wptr = vcn_v4_0_3_unified_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 1f6a22983c0d..1571cc5a148c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1483,6 +1483,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v4_0_5_unified_ring_get_rptr, .get_wptr = vcn_v4_0_5_unified_ring_get_wptr, .set_wptr = vcn_v4_0_5_unified_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 6109124f852e..d5f49fa33bee 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1207,6 +1207,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v5_0_0_unified_ring_get_rptr, .get_wptr = vcn_v5_0_0_unified_ring_get_wptr, .set_wptr = vcn_v5_0_0_unified_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index c28c6aff17aa..54fbf8d73ca6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -1419,6 +1419,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v5_0_1_unified_ring_get_rptr, .get_wptr = vcn_v5_0_1_unified_ring_get_wptr, .set_wptr = vcn_v5_0_1_unified_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c index c3d3cc023058..bbc172db91a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c @@ -994,6 +994,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_2_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .no_user_fence = true, .get_rptr = vcn_v5_0_2_unified_ring_get_rptr, .get_wptr = vcn_v5_0_2_unified_ring_get_wptr, .set_wptr = vcn_v5_0_2_unified_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 55ea5145a28a..03b266b26738 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -25,6 +25,7 @@ #include <linux/err.h> #include <linux/fs.h> #include <linux/file.h> +#include <linux/overflow.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/uaccess.h> @@ -66,6 +67,21 @@ static const struct class kfd_class = { .name = kfd_dev_name, }; +/* + * Cache the address space of the chardev on first open so that the reset + * path can drop all userspace mappings of doorbell and MMIO ranges via + * unmap_mapping_range(). + */ +static struct address_space *kfd_dev_mapping; + +void kfd_dev_unmap_mapping_range(loff_t const holebegin, loff_t const holelen) +{ + struct address_space *mapping = READ_ONCE(kfd_dev_mapping); + + if (mapping) + unmap_mapping_range(mapping, holebegin, holelen, 1); +} + static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id) { struct kfd_process_device *pdd; @@ -132,6 +148,13 @@ static int kfd_open(struct inode *inode, struct file *filep) if (iminor(inode) != 0) return -ENODEV; + /* + * /dev/kfd is a single chardev so all opens share one inode. Cache + * its address_space on the first open for use by the reset path. + */ + if (!READ_ONCE(kfd_dev_mapping)) + cmpxchg(&kfd_dev_mapping, NULL, inode->i_mapping); + is_32bit_user_mode = in_compat_syscall(); if (is_32bit_user_mode) { @@ -1359,7 +1382,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); if (WARN_ON_ONCE(!peer_pdd)) continue; - kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(peer_pdd); } kfree(devices_arr); @@ -1454,7 +1477,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, if (WARN_ON_ONCE(!peer_pdd)) continue; if (flush_tlb) - kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); + kfd_flush_tlb(peer_pdd); /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */ err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); @@ -1695,6 +1718,16 @@ static int kfd_ioctl_smi_events(struct file *filep, return kfd_smi_event_open(pdd->dev, &args->anon_fd); } +static int kfd_ioctl_svm_validate(void *kdata, unsigned int usize) +{ + struct kfd_ioctl_svm_args *args = kdata; + size_t expected = struct_size(args, attrs, args->nattr); + + if (expected == SIZE_MAX || usize < expected) + return -EINVAL; + return 0; +} + #if IS_ENABLED(CONFIG_HSA_AMD_SVM) static int kfd_ioctl_set_xnack_mode(struct file *filep, @@ -3209,7 +3242,11 @@ static int kfd_ioctl_create_process(struct file *filep, struct kfd_process *p, v #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ - .cmd_drv = 0, .name = #ioctl} + .validate = NULL, .cmd_drv = 0, .name = #ioctl} + +#define AMDKFD_IOCTL_DEF_V(ioctl, _func, _validate, _flags) \ + [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ + .validate = _validate, .cmd_drv = 0, .name = #ioctl} /** Ioctl table */ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { @@ -3306,7 +3343,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, kfd_ioctl_smi_events, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0), + AMDKFD_IOCTL_DEF_V(AMDKFD_IOC_SVM, kfd_ioctl_svm, + kfd_ioctl_svm_validate, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, kfd_ioctl_set_xnack_mode, 0), @@ -3431,6 +3469,12 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) memset(kdata, 0, usize); } + if (ioctl->validate) { + retcode = ioctl->validate(kdata, usize); + if (retcode) + goto err_i1; + } + retcode = func(filep, process, kdata); if (cmd & IOC_OUT) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 8ff97bf7d95a..b7f8f7ff8198 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1737,37 +1737,6 @@ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entr return false; } -/* check if there is kfd process still uses adev */ -static bool kgd2kfd_check_device_idle(struct amdgpu_device *adev) -{ - struct kfd_process *p; - struct hlist_node *p_temp; - unsigned int temp; - struct kfd_node *dev; - - mutex_lock(&kfd_processes_mutex); - - if (hash_empty(kfd_processes_table)) { - mutex_unlock(&kfd_processes_mutex); - return true; - } - - /* check if there is device still use adev */ - hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) { - for (int i = 0; i < p->n_pdds; i++) { - dev = p->pdds[i]->dev; - if (dev->adev == adev) { - mutex_unlock(&kfd_processes_mutex); - return false; - } - } - } - - mutex_unlock(&kfd_processes_mutex); - - return true; -} - /** kgd2kfd_teardown_processes - gracefully tear down existing * kfd processes that use adev * @@ -1800,7 +1769,7 @@ void kgd2kfd_teardown_processes(struct amdgpu_device *adev) mutex_unlock(&kfd_processes_mutex); /* wait all kfd processes use adev terminate */ - while (!kgd2kfd_check_device_idle(adev)) + while (!!atomic_read(&adev->kfd.dev->kfd_processes_count)) cond_resched(); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ab3b2e7be9bd..e0a31e11f0ff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -475,6 +475,9 @@ static int allocate_doorbell(struct qcm_process_device *qpd, } else { /* For CP queues on SOC15 */ if (restore_id) { + if (*restore_id >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) + return -EINVAL; + /* make sure that ID is free */ if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) return -EINVAL; @@ -572,7 +575,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, qpd->vmid, qpd->page_table_base); /* invalidate the VM context after pasid and vmid mapping is set up */ - kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); + kfd_flush_tlb(qpd_to_pdd(qpd)); if (dqm->dev->kfd2kgd->set_scratch_backing_va) dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, @@ -610,7 +613,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, if (flush_texture_cache_nocpsch(q->device, qpd)) dev_err(dev, "Failed to flush TC\n"); - kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); + kfd_flush_tlb(qpd_to_pdd(qpd)); /* Release the vmid mapping */ set_pasid_vmid_mapping(dqm, 0, qpd->vmid); @@ -1284,7 +1287,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, dqm->dev->adev, qpd->vmid, qpd->page_table_base); - kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(pdd); } /* Take a safe reference to the mm_struct, which may otherwise @@ -1587,6 +1590,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, } if (restore_sdma_id) { + if (*restore_sdma_id >= get_num_sdma_queues(dqm)) + return -EINVAL; + /* Re-use existing sdma_id */ if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { dev_err(dev, "SDMA queue already in use\n"); @@ -1613,6 +1619,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, return -ENOMEM; } if (restore_sdma_id) { + if (*restore_sdma_id >= get_num_xgmi_sdma_queues(dqm)) + return -EINVAL; + /* Re-use existing sdma_id */ if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { dev_err(dev, "SDMA queue already in use\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index e8f97de9d6e4..f6d9d81003dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -364,11 +364,15 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, { struct v9_mqd *m; struct kfd_context_save_area_header header; + u32 cntl_stack_size; + u32 cntl_stack_offset; /* Control stack is located one page after MQD. */ void *mqd_ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE); m = get_mqd(mqd); + cntl_stack_size = min_t(u32, m->cp_hqd_cntl_stack_size, q->ctl_stack_size); + cntl_stack_offset = min_t(u32, m->cp_hqd_cntl_stack_offset, cntl_stack_size); *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - m->cp_hqd_cntl_stack_offset; @@ -384,9 +388,10 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state))) return -EFAULT; - if (copy_to_user(ctl_stack + m->cp_hqd_cntl_stack_offset, - mqd_ctl_stack + m->cp_hqd_cntl_stack_offset, - *ctl_stack_used_size)) + *ctl_stack_used_size = cntl_stack_size - cntl_stack_offset; + + if (copy_to_user(ctl_stack + cntl_stack_offset, mqd_ctl_stack + cntl_stack_offset, + *ctl_stack_used_size)) return -EFAULT; return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 6e333bfa17d6..d5b07789eda4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -395,6 +395,7 @@ enum kfd_mempool { /* Character device interface */ int kfd_chardev_init(void); void kfd_chardev_exit(void); +void kfd_dev_unmap_mapping_range(loff_t const holebegin, loff_t const holelen); /** * enum kfd_unmap_queues_filter - Enum for queue filters. @@ -1047,10 +1048,13 @@ extern struct srcu_struct kfd_processes_srcu; typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, void *data); +typedef int amdkfd_ioctl_validate_t(void *kdata, unsigned int usize); + struct amdkfd_ioctl_desc { unsigned int cmd; int flags; amdkfd_ioctl_t *func; + amdkfd_ioctl_validate_t *validate; unsigned int cmd_drv; const char *name; }; @@ -1551,13 +1555,13 @@ void kfd_signal_reset_event(struct kfd_node *dev); void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid); void kfd_signal_process_terminate_event(struct kfd_process *p); -static inline void kfd_flush_tlb(struct kfd_process_device *pdd, - enum TLB_FLUSH_TYPE type) +static inline void kfd_flush_tlb(struct kfd_process_device *pdd) { struct amdgpu_device *adev = pdd->dev->adev; struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); - amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask); + amdgpu_vm_flush_compute_tlb(adev, vm, TLB_FLUSH_HEAVYWEIGHT, + pdd->dev->xcc_mask); } static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index b120fdb0ef77..35ec67d9739b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1366,6 +1366,12 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, pr_debug("CPU[0x%llx 0x%llx] -> GPU[0x%llx 0x%llx]\n", start, last, gpu_start, gpu_end); + + if (!amdgpu_vm_ready(vm)) { + pr_debug("VM not ready, canceling unmap\n"); + return -EINVAL; + } + return amdgpu_vm_update_range(adev, vm, false, true, true, false, NULL, gpu_start, gpu_end, init_pte_value, 0, 0, NULL, NULL, fence); @@ -1418,7 +1424,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, if (r) break; } - kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT); + kfd_flush_tlb(pdd); } return r; @@ -1443,6 +1449,11 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms, last_start, last_start + npages - 1, readonly); + if (!amdgpu_vm_ready(vm)) { + pr_debug("VM not ready, canceling map\n"); + return -EINVAL; + } + for (i = offset; i < offset + npages; i++) { uint64_t gpu_start; uint64_t gpu_end; @@ -1560,7 +1571,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, } } - kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); + kfd_flush_tlb(pdd); } return r; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e96a12ff2d31..5fc5d5608506 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1903,7 +1903,11 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) goto error; } - init_data.asic_id.chip_family = adev->family; + /* special handling for early revisions of GC 11.5.4 */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4)) + init_data.asic_id.chip_family = AMDGPU_FAMILY_GC_11_5_4; + else + init_data.asic_id.chip_family = adev->family; init_data.asic_id.pci_revision_id = adev->pdev->revision; init_data.asic_id.hw_internal_rev = adev->external_rev_id; @@ -9404,9 +9408,21 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, if (acrtc_state) { timing = &acrtc_state->stream->timing; - if (amdgpu_ip_version(adev, DCE_HWIP, 0) < - IP_VERSION(3, 5, 0) || - !(adev->flags & AMD_IS_APU)) { + if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= + IP_VERSION(3, 2, 0) && + !(adev->flags & AMD_IS_APU)) { + /* + * DGPUs NV3x and newer that support idle optimizations + * experience intermittent flip-done timeouts on cursor + * updates. Restore 5s offdelay behavior for now. + * + * Discussion on the issue: + * https://lore.kernel.org/amd-gfx/20260217191632.1243826-1-sysdadmin@m1k.cloud/ + */ + config.offdelay_ms = 5000; + config.disable_immediate = false; + } else if (amdgpu_ip_version(adev, DCE_HWIP, 0) < + IP_VERSION(3, 5, 0)) { /* * Older HW and DGPU have issues with instant off; * use a 2 frame offdelay. diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 3b8ae7798a93..a3cb05490dc9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1032,6 +1032,45 @@ dm_helpers_read_acpi_edid(struct amdgpu_dm_connector *aconnector) return drm_edid_read_custom(connector, dm_helpers_probe_acpi_edid, connector); } +static const struct drm_edid * +dm_helpers_read_vbios_hardcoded_edid(struct dc_link *link, struct amdgpu_dm_connector *aconnector) +{ + struct dc_bios *bios = link->ctx->dc_bios; + struct embedded_panel_info info; + const struct drm_edid *edid; + enum bp_result r; + + if (!dc_is_embedded_signal(link->connector_signal) || + !bios->funcs->get_embedded_panel_info) + return NULL; + + memset(&info, 0, sizeof(info)); + r = bios->funcs->get_embedded_panel_info(bios, &info); + + if (r != BP_RESULT_OK) { + dm_error("Error when reading embedded panel info: %u\n", r); + return NULL; + } + + if (!info.fake_edid || !info.fake_edid_size) { + dm_error("Embedded panel info doesn't contain an EDID\n"); + return NULL; + } + + edid = drm_edid_alloc(info.fake_edid, info.fake_edid_size); + + if (!drm_edid_valid(edid)) { + dm_error("EDID from embedded panel info is invalid\n"); + drm_edid_free(edid); + return NULL; + } + + aconnector->base.display_info.width_mm = info.panel_width_mm; + aconnector->base.display_info.height_mm = info.panel_height_mm; + + return edid; +} + void populate_hdmi_info_from_connector(struct drm_hdmi_info *hdmi, struct dc_edid_caps *edid_caps) { edid_caps->scdc_present = hdmi->scdc.supported; @@ -1052,6 +1091,9 @@ enum dc_edid_status dm_helpers_read_local_edid( if (link->aux_mode) ddc = &aconnector->dm_dp_aux.aux.ddc; + else if (link->ddc_hw_inst == GPIO_DDC_LINE_UNKNOWN && + dc_is_embedded_signal(link->connector_signal)) + ddc = NULL; else ddc = &aconnector->i2c->base; @@ -1065,6 +1107,8 @@ enum dc_edid_status dm_helpers_read_local_edid( drm_edid = dm_helpers_read_acpi_edid(aconnector); if (drm_edid) drm_info(connector->dev, "Using ACPI provided EDID for %s\n", connector->name); + else if (!ddc) + drm_edid = dm_helpers_read_vbios_hardcoded_edid(link, aconnector); else drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index dd362071a6c9..c307f42fe0b9 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -794,11 +794,13 @@ static enum bp_result bios_parser_external_encoder_control( static enum bp_result bios_parser_dac_load_detection( struct dc_bios *dcb, - enum engine_id engine_id) + enum engine_id engine_id, + struct graphics_object_id ext_enc_id) { struct bios_parser *bp = BP_FROM_DCB(dcb); struct dc_context *ctx = dcb->ctx; struct bp_load_detection_parameters bp_params = {0}; + struct bp_external_encoder_control ext_cntl = {0}; enum bp_result bp_result = BP_RESULT_UNSUPPORTED; uint32_t bios_0_scratch; uint32_t device_id_mask = 0; @@ -824,6 +826,13 @@ static enum bp_result bios_parser_dac_load_detection( bp_params.engine_id = engine_id; bp_result = bp->cmd_tbl.dac_load_detection(bp, &bp_params); + } else if (ext_enc_id.id) { + if (!bp->cmd_tbl.external_encoder_control) + return BP_RESULT_UNSUPPORTED; + + ext_cntl.action = EXTERNAL_ENCODER_CONTROL_DAC_LOAD_DETECT; + ext_cntl.encoder_id = ext_enc_id; + bp_result = bp->cmd_tbl.external_encoder_control(bp, &ext_cntl); } if (bp_result != BP_RESULT_OK) @@ -1304,6 +1313,60 @@ static enum bp_result bios_parser_get_embedded_panel_info( return BP_RESULT_FAILURE; } +static enum bp_result get_embedded_panel_extra_info( + struct bios_parser *bp, + struct embedded_panel_info *info, + const uint32_t table_offset) +{ + uint8_t *record = bios_get_image(&bp->base, table_offset, 1); + ATOM_PANEL_RESOLUTION_PATCH_RECORD *panel_res_record; + ATOM_FAKE_EDID_PATCH_RECORD *fake_edid_record; + + while (*record != ATOM_RECORD_END_TYPE) { + switch (*record) { + case LCD_MODE_PATCH_RECORD_MODE_TYPE: + record += sizeof(ATOM_PATCH_RECORD_MODE); + break; + case LCD_RTS_RECORD_TYPE: + record += sizeof(ATOM_LCD_RTS_RECORD); + break; + case LCD_CAP_RECORD_TYPE: + record += sizeof(ATOM_LCD_MODE_CONTROL_CAP); + break; + case LCD_FAKE_EDID_PATCH_RECORD_TYPE: + fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; + if (fake_edid_record->ucFakeEDIDLength) { + if (fake_edid_record->ucFakeEDIDLength == 128) + info->fake_edid_size = + fake_edid_record->ucFakeEDIDLength; + else + info->fake_edid_size = + fake_edid_record->ucFakeEDIDLength * 128; + + info->fake_edid = fake_edid_record->ucFakeEDIDString; + + record += struct_size(fake_edid_record, + ucFakeEDIDString, + info->fake_edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; + } + break; + case LCD_PANEL_RESOLUTION_RECORD_TYPE: + panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; + info->panel_width_mm = panel_res_record->usHSize; + info->panel_height_mm = panel_res_record->usVSize; + record += sizeof(ATOM_PANEL_RESOLUTION_PATCH_RECORD); + break; + default: + return BP_RESULT_BADBIOSTABLE; + } + } + + return BP_RESULT_OK; +} + static enum bp_result get_embedded_panel_info_v1_2( struct bios_parser *bp, struct embedded_panel_info *info) @@ -1420,6 +1483,10 @@ static enum bp_result get_embedded_panel_info_v1_2( if (ATOM_PANEL_MISC_API_ENABLED & lvds->ucLVDS_Misc) info->lcd_timing.misc_info.API_ENABLED = true; + if (lvds->usExtInfoTableOffset) + return get_embedded_panel_extra_info(bp, info, + le16_to_cpu(lvds->usExtInfoTableOffset) + DATA_TABLES(LCD_Info)); + return BP_RESULT_OK; } @@ -1545,6 +1612,10 @@ static enum bp_result get_embedded_panel_info_v1_3( (uint32_t) (ATOM_PANEL_MISC_V13_GREY_LEVEL & lvds->ucLCD_Misc) >> ATOM_PANEL_MISC_V13_GREY_LEVEL_SHIFT; + if (lvds->usExtInfoTableOffset) + return get_embedded_panel_extra_info(bp, info, + le16_to_cpu(lvds->usExtInfoTableOffset) + DATA_TABLES(LCD_Info)); + return BP_RESULT_OK; } diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index a1c08e1cc411..c51c4b2c6fae 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -493,6 +493,10 @@ static enum bp_result get_gpio_i2c_info( - sizeof(struct atom_common_table_header)) / sizeof(struct atom_gpio_pin_assignment); + if (!bios_get_image(&bp->base, DATA_TABLES(gpio_pin_lut), + le16_to_cpu(header->table_header.structuresize))) + return BP_RESULT_BADBIOSTABLE; + pin = (struct atom_gpio_pin_assignment *) header->gpio_pin; for (table_index = 0; table_index < count; table_index++) { @@ -681,6 +685,11 @@ static enum bp_result bios_parser_get_gpio_pin_info( count = (le16_to_cpu(header->table_header.structuresize) - sizeof(struct atom_common_table_header)) / sizeof(struct atom_gpio_pin_assignment); + + if (!bios_get_image(&bp->base, DATA_TABLES(gpio_pin_lut), + le16_to_cpu(header->table_header.structuresize))) + return BP_RESULT_BADBIOSTABLE; + for (i = 0; i < count; ++i) { if (header->gpio_pin[i].gpio_id != gpio_id) continue; diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c index 8d2cf95ae739..e00dc05c2d9d 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c @@ -37,10 +37,13 @@ uint8_t *bios_get_image(struct dc_bios *bp, uint32_t offset, uint32_t size) { - if (bp->bios && offset + size < bp->bios_size) - return bp->bios + offset; - else + if (!bp->bios) return NULL; + + if (offset > bp->bios_size || size > bp->bios_size - offset) + return NULL; + + return bp->bios + offset; } #include "reg_helper.h" diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 419f894c87b0..b3530fbf32f7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -6071,7 +6071,11 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc, uint8_t action; union dmub_rb_cmd cmd = {0}; - ASSERT(payload->length <= 16); + if (link_index >= dc->link_count || !dc->links[link_index]) + return false; + + if (payload->length > sizeof(cmd.dp_aux_access.aux_control.dpaux.data)) + return false; cmd.dp_aux_access.header.type = DMUB_CMD__DP_AUX_ACCESS; cmd.dp_aux_access.header.payload_bytes = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7f55ba09b191..37714d4371fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1682,7 +1682,7 @@ struct dc_scratch_space { struct dc_link_training_overrides preferred_training_settings; struct dp_audio_test_data audio_test_data; - uint8_t ddc_hw_inst; + enum gpio_ddc_line ddc_hw_inst; uint8_t hpd_src; diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index 6f96c5cf39fe..526f71616f94 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -102,7 +102,8 @@ struct dc_vbios_funcs { struct bp_external_encoder_control *cntl); enum bp_result (*dac_load_detection)( struct dc_bios *bios, - enum engine_id engine_id); + enum engine_id engine_id, + struct graphics_object_id ext_enc_id); enum bp_result (*transmitter_control)( struct dc_bios *bios, struct bp_transmitter_control *cntl); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 5f40ae9e3120..e15fd1454d3b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -1102,7 +1102,9 @@ void dce110_link_encoder_hw_init( ASSERT(result == BP_RESULT_OK); } - aux_initialize(enc110); + + if (enc110->aux_regs) + aux_initialize(enc110); /* reinitialize HPD. * hpd_initialize() will pass DIG_FE id to HW context. diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c index 6f2a0d5d963b..62fe5c3b18dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c @@ -40,8 +40,8 @@ #define FN(reg_name, field_name) \ mcif_wb30->mcif_wb_shift->field_name, mcif_wb30->mcif_wb_mask->field_name -#define MCIF_ADDR(addr) (((unsigned long long)addr & 0xffffffffff) + 0xFE) >> 8 -#define MCIF_ADDR_HIGH(addr) (unsigned long long)addr >> 40 +#define MCIF_ADDR(addr) ((uint32_t)((((unsigned long long)(addr) & 0xffffffffffULL) + 0xFEULL) >> 8)) +#define MCIF_ADDR_HIGH(addr) ((uint32_t)(((unsigned long long)(addr)) >> 40)) /* wbif programming guide: * 1. set up wbif parameter: diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c index a2c46350e44e..95f8b7c7d657 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c @@ -646,6 +646,9 @@ failure: enum gpio_ddc_line dal_ddc_get_line( const struct ddc *ddc) { + if (!ddc) + return GPIO_DDC_LINE_UNKNOWN; + return (enum gpio_ddc_line)dal_gpio_get_enum(ddc->pin_data); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 5273ca09fe12..f0abbb7c2cb2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -665,16 +665,45 @@ void dce110_update_info_frame(struct pipe_ctx *pipe_ctx) } static void -dce110_dac_encoder_control(struct pipe_ctx *pipe_ctx, bool enable) +dce110_external_encoder_control(enum bp_external_encoder_control_action action, + struct dc_link *link, + struct dc_crtc_timing *timing) { - struct dc_link *link = pipe_ctx->stream->link; + struct dc *dc = link->ctx->dc; struct dc_bios *bios = link->ctx->dc_bios; - struct bp_encoder_control encoder_control = {0}; + const struct dc_link_settings *link_settings = &link->cur_link_settings; + enum bp_result bp_result = BP_RESULT_OK; + struct bp_external_encoder_control ext_cntl = { + .action = action, + .connector_obj_id = link->link_enc->connector, + .encoder_id = link->ext_enc_id, + .lanes_number = link_settings->lane_count, + .link_rate = link_settings->link_rate, + + /* Use signal type of the real link encoder, ie. DP */ + .signal = link->connector_signal, + + /* We don't know the timing yet when executing the SETUP action, + * so use a reasonably high default value. It seems that ENABLE + * can change the actual pixel clock but doesn't work with higher + * pixel clocks than what SETUP was called with. + */ + .pixel_clock = timing ? timing->pix_clk_100hz / 10 : 300000, + .color_depth = timing ? timing->display_color_depth : COLOR_DEPTH_888, + }; + DC_LOGGER_INIT(dc->ctx); - encoder_control.action = enable ? ENCODER_CONTROL_ENABLE : ENCODER_CONTROL_DISABLE; - encoder_control.engine_id = link->link_enc->analog_engine; - encoder_control.pixel_clock = pipe_ctx->stream->timing.pix_clk_100hz / 10; - bios->funcs->encoder_control(bios, &encoder_control); + bp_result = bios->funcs->external_encoder_control(bios, &ext_cntl); + + if (bp_result != BP_RESULT_OK) + DC_LOG_ERROR("Failed to execute external encoder action: 0x%x\n", action); +} + +static void +dce110_prepare_ddc(struct dc_link *link) +{ + if (link->ext_enc_id.id) + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_DDC_SETUP, link, NULL); } static bool @@ -684,7 +713,8 @@ dce110_dac_load_detect(struct dc_link *link) struct link_encoder *link_enc = link->link_enc; enum bp_result bp_result; - bp_result = bios->funcs->dac_load_detection(bios, link_enc->analog_engine); + bp_result = bios->funcs->dac_load_detection( + bios, link_enc->analog_engine, link->ext_enc_id); return bp_result == BP_RESULT_OK; } @@ -700,7 +730,6 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx) uint32_t early_control = 0; struct timing_generator *tg = pipe_ctx->stream_res.tg; - link_hwss->setup_stream_attribute(pipe_ctx); link_hwss->setup_stream_encoder(pipe_ctx); dc->hwss.update_info_frame(pipe_ctx); @@ -719,8 +748,8 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx) tg->funcs->set_early_control(tg, early_control); - if (dc_is_rgb_signal(pipe_ctx->stream->signal)) - dce110_dac_encoder_control(pipe_ctx, true); + if (link->ext_enc_id.id) + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_ENABLE, link, timing); } static enum bp_result link_transmitter_control( @@ -1219,8 +1248,8 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) link_enc->transmitter - TRANSMITTER_UNIPHY_A); } - if (dc_is_rgb_signal(pipe_ctx->stream->signal)) - dce110_dac_encoder_control(pipe_ctx, false); + if (link->ext_enc_id.id) + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_DISABLE, link, NULL); } void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, @@ -1603,22 +1632,6 @@ static enum dc_status dce110_enable_stream_timing( return DC_OK; } -static void -dce110_select_crtc_source(struct pipe_ctx *pipe_ctx) -{ - struct dc_link *link = pipe_ctx->stream->link; - struct dc_bios *bios = link->ctx->dc_bios; - struct bp_crtc_source_select crtc_source_select = {0}; - enum engine_id engine_id = link->link_enc->preferred_engine; - - if (dc_is_rgb_signal(pipe_ctx->stream->signal)) - engine_id = link->link_enc->analog_engine; - crtc_source_select.controller_id = CONTROLLER_ID_D0 + pipe_ctx->stream_res.tg->inst; - crtc_source_select.color_depth = pipe_ctx->stream->timing.display_color_depth; - crtc_source_select.engine_id = engine_id; - crtc_source_select.sink_signal = pipe_ctx->stream->signal; - bios->funcs->select_crtc_source(bios, &crtc_source_select); -} enum dc_status dce110_apply_single_controller_ctx_to_hw( struct pipe_ctx *pipe_ctx, @@ -1639,10 +1652,6 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( hws->funcs.disable_stream_gating(dc, pipe_ctx); } - if (pipe_ctx->stream->signal == SIGNAL_TYPE_RGB) { - dce110_select_crtc_source(pipe_ctx); - } - if (pipe_ctx->stream_res.audio != NULL) { struct audio_output audio_output = {0}; @@ -1722,8 +1731,7 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( pipe_ctx->stream_res.tg->funcs->set_static_screen_control( pipe_ctx->stream_res.tg, event_triggers, 2); - if (!dc_is_virtual_signal(pipe_ctx->stream->signal) && - !dc_is_rgb_signal(pipe_ctx->stream->signal)) + if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg( pipe_ctx->stream_res.stream_enc, pipe_ctx->stream_res.tg->inst); @@ -3376,6 +3384,15 @@ void dce110_enable_tmds_link_output(struct dc_link *link, link->phy_state.symclk_state = SYMCLK_ON_TX_ON; } +static void dce110_enable_analog_link_output( + struct dc_link *link, + uint32_t pix_clk_100hz) +{ + link->link_enc->funcs->enable_analog_output( + link->link_enc, + pix_clk_100hz); +} + void dce110_enable_dp_link_output( struct dc_link *link, const struct link_resource *link_res, @@ -3423,6 +3440,11 @@ void dce110_enable_dp_link_output( } } + if (link->ext_enc_id.id) { + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_INIT, link, NULL); + dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_SETUP, link, NULL); + } + if (dc->link_srv->dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { if (dc->clk_mgr->funcs->notify_link_rate_change) dc->clk_mgr->funcs->notify_link_rate_change(dc->clk_mgr, link); @@ -3513,8 +3535,10 @@ static const struct hw_sequencer_funcs dce110_funcs = { .enable_lvds_link_output = dce110_enable_lvds_link_output, .enable_tmds_link_output = dce110_enable_tmds_link_output, .enable_dp_link_output = dce110_enable_dp_link_output, + .enable_analog_link_output = dce110_enable_analog_link_output, .disable_link_output = dce110_disable_link_output, .dac_load_detect = dce110_dac_load_detect, + .prepare_ddc = dce110_prepare_ddc, }; static const struct hwseq_private_funcs dce110_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 7e7682d7dfc8..ae4c4ad05baa 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -568,7 +568,9 @@ static bool construct_phy(struct dc_link *link, goto ddc_create_fail; } - if (!link->ddc->ddc_pin) { + /* Embedded display connectors such as LVDS may not have DDC. */ + if (!link->ddc->ddc_pin && + !dc_is_embedded_signal(link->connector_signal)) { DC_ERROR("Failed to get I2C info for connector!\n"); goto ddc_create_fail; } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c index 6a25dcfcdf17..d2d56a1c4b8b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c @@ -753,7 +753,8 @@ static struct link_encoder *dce60_link_encoder_create( enc_init_data, &link_enc_feature, &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], + enc_init_data->channel == CHANNEL_ID_UNKNOWN ? + NULL : &link_enc_aux_regs[enc_init_data->channel - 1], enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c index 33be49b3c1b1..6c00497e9a01 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c @@ -760,7 +760,8 @@ static struct link_encoder *dce80_link_encoder_create( enc_init_data, &link_enc_feature, &link_enc_regs[link_regs_id], - &link_enc_aux_regs[enc_init_data->channel - 1], + enc_init_data->channel == CHANNEL_ID_UNKNOWN ? + NULL : &link_enc_aux_regs[enc_init_data->channel - 1], enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ? NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]); return &enc110->base; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 82f81b586986..3751f7a94a05 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -92,9 +92,14 @@ #include "dml/dcn32/dcn32_fpu.h" #include "dc_state_priv.h" +#include "dc_fpu.h" #include "dml2_0/dml2_wrapper.h" +#if !defined(DC_RUN_WITH_PREEMPTION_ENABLED) +#define DC_RUN_WITH_PREEMPTION_ENABLED(code) code +#endif + #define DC_LOGGER_INIT(logger) enum dcn32_clk_src_array_id { @@ -1684,7 +1689,8 @@ static void dcn32_enable_phantom_plane(struct dc *dc, if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) phantom_plane = prev_phantom_plane; else - phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state); + DC_RUN_WITH_PREEMPTION_ENABLED(phantom_plane = + dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state)); if (!phantom_plane) continue; diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h index 38a77fa9b4af..a0f03fb67605 100644 --- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h +++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h @@ -153,6 +153,10 @@ struct embedded_panel_info { uint32_t drr_enabled; uint32_t min_drr_refresh_rate; bool realtek_eDPToLVDS; + uint16_t panel_width_mm; + uint16_t panel_height_mm; + uint16_t fake_edid_size; + const uint8_t *fake_edid; }; struct dc_firmware_info { diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c index 731355bdb9bc..3650e7beeb67 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c @@ -1333,12 +1333,13 @@ static int ci_populate_all_memory_levels(struct pp_hwmgr *hwmgr) dev_id = adev->pdev->device; - if ((dpm_table->mclk_table.count >= 2) - && ((dev_id == 0x67B0) || (dev_id == 0x67B1))) { - smu_data->smc_state_table.MemoryLevel[1].MinVddci = - smu_data->smc_state_table.MemoryLevel[0].MinVddci; - smu_data->smc_state_table.MemoryLevel[1].MinMvdd = - smu_data->smc_state_table.MemoryLevel[0].MinMvdd; + if ((dpm_table->mclk_table.count >= 2) && + ((dev_id == 0x67B0) || (dev_id == 0x67B1)) && + (adev->pdev->revision == 0)) { + smu_data->smc_state_table.MemoryLevel[1].MinVddc = + smu_data->smc_state_table.MemoryLevel[0].MinVddc; + smu_data->smc_state_table.MemoryLevel[1].MinVddcPhases = + smu_data->smc_state_table.MemoryLevel[0].MinVddcPhases; } smu_data->smc_state_table.MemoryLevel[0].ActivityLevel = 0x1F; CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.MemoryLevel[0].ActivityLevel); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 7f386ff0c872..9d8b1227388f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -425,6 +425,7 @@ static int aldebaran_set_default_dpm_table(struct smu_context *smu) dpm_table->dpm_levels[0].enabled = true; dpm_table->dpm_levels[1].value = pptable->GfxclkFmax; dpm_table->dpm_levels[1].enabled = true; + dpm_table->flags |= SMU_DPM_TABLE_FINE_GRAINED; } else { dpm_table->count = 1; dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index cd0a23f432ff..0df8c05a7fce 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -1129,6 +1129,7 @@ static int smu_v13_0_6_set_default_dpm_table(struct smu_context *smu) /* gfxclk dpm table setup */ dpm_table = &dpm_context->dpm_tables.gfx_table; dpm_table->clk_type = SMU_GFXCLK; + dpm_table->flags = SMU_DPM_TABLE_FINE_GRAINED; if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) { /* In the case of gfxclk, only fine-grained dpm is honored. * Get min/max values from FW. diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c index c3cb36813806..940b43105817 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c @@ -435,10 +435,12 @@ int smu_v15_0_fini_smc_tables(struct smu_context *smu) smu_table->watermarks_table = NULL; smu_table->metrics_time = 0; + kfree(smu_dpm->dpm_policies); kfree(smu_dpm->dpm_context); kfree(smu_dpm->golden_dpm_context); kfree(smu_dpm->dpm_current_power_state); kfree(smu_dpm->dpm_request_power_state); + smu_dpm->dpm_policies = NULL; smu_dpm->dpm_context = NULL; smu_dpm->golden_dpm_context = NULL; smu_dpm->dpm_context_size = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 3d49e58794d2..90c7127beabf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1370,7 +1370,7 @@ int smu_cmn_print_dpm_clk_levels(struct smu_context *smu, level_index = 1; } - if (!is_fine_grained) { + if (!is_fine_grained || count == 1) { for (i = 0; i < count; i++) { freq_match = !is_deep_sleep && smu_cmn_freqs_match( diff --git a/drivers/gpu/drm/bridge/chipone-icn6211.c b/drivers/gpu/drm/bridge/chipone-icn6211.c index 814713c5bea9..553a1df4688d 100644 --- a/drivers/gpu/drm/bridge/chipone-icn6211.c +++ b/drivers/gpu/drm/bridge/chipone-icn6211.c @@ -758,7 +758,9 @@ static int chipone_i2c_probe(struct i2c_client *client) dev_set_drvdata(dev, icn); i2c_set_clientdata(client, icn); - drm_bridge_add(&icn->bridge); + ret = devm_drm_bridge_add(dev, &icn->bridge); + if (ret) + return ret; return chipone_dsi_host_attach(icn); } diff --git a/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c b/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c index 441fd32dc91c..d64e328bf542 100644 --- a/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c +++ b/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c @@ -222,52 +222,58 @@ static const struct drm_bridge_funcs imx8qxp_pxl2dpi_bridge_funcs = { imx8qxp_pxl2dpi_bridge_atomic_get_output_bus_fmts, }; -static struct device_node * +static int imx8qxp_pxl2dpi_get_available_ep_from_port(struct imx8qxp_pxl2dpi *p2d, - u32 port_id) + u32 port_id, + struct device_node **ep) { - struct device_node *port, *ep; + struct device_node *port; + int ret = 0; int ep_cnt; + *ep = NULL; + port = of_graph_get_port_by_id(p2d->dev->of_node, port_id); if (!port) { DRM_DEV_ERROR(p2d->dev, "failed to get port@%u\n", port_id); - return ERR_PTR(-ENODEV); + return -ENODEV; } ep_cnt = of_get_available_child_count(port); if (ep_cnt == 0) { DRM_DEV_ERROR(p2d->dev, "no available endpoints of port@%u\n", port_id); - ep = ERR_PTR(-ENODEV); + ret = -ENODEV; goto out; } else if (ep_cnt > 1) { DRM_DEV_ERROR(p2d->dev, "invalid available endpoints of port@%u\n", port_id); - ep = ERR_PTR(-EINVAL); + ret = -EINVAL; goto out; } - ep = of_get_next_available_child(port, NULL); - if (!ep) { + *ep = of_get_next_available_child(port, NULL); + if (!*ep) { DRM_DEV_ERROR(p2d->dev, "failed to get available endpoint of port@%u\n", port_id); - ep = ERR_PTR(-ENODEV); + ret = -ENODEV; goto out; } out: of_node_put(port); - return ep; + return ret; } static int imx8qxp_pxl2dpi_find_next_bridge(struct imx8qxp_pxl2dpi *p2d) { - struct device_node *ep __free(device_node) = - imx8qxp_pxl2dpi_get_available_ep_from_port(p2d, 1); - if (IS_ERR(ep)) - return PTR_ERR(ep); + struct device_node *ep __free(device_node) = NULL; + int ret; + + ret = imx8qxp_pxl2dpi_get_available_ep_from_port(p2d, 1, &ep); + if (ret) + return ret; struct device_node *remote __free(device_node) = of_graph_get_remote_port_parent(ep); if (!remote || !of_device_is_available(remote)) { @@ -291,9 +297,9 @@ static int imx8qxp_pxl2dpi_set_pixel_link_sel(struct imx8qxp_pxl2dpi *p2d) struct of_endpoint endpoint; int ret; - ep = imx8qxp_pxl2dpi_get_available_ep_from_port(p2d, 0); - if (IS_ERR(ep)) - return PTR_ERR(ep); + ret = imx8qxp_pxl2dpi_get_available_ep_from_port(p2d, 0, &ep); + if (ret) + return ret; ret = of_graph_parse_endpoint(ep, &endpoint); if (ret) { diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c index 9246e9c15a6e..ed21f09cd19a 100644 --- a/drivers/gpu/drm/bridge/ite-it66121.c +++ b/drivers/gpu/drm/bridge/ite-it66121.c @@ -1559,6 +1559,11 @@ static int it66121_probe(struct i2c_client *client) return ret; } + ctx->gpio_reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); + if (IS_ERR(ctx->gpio_reset)) + return dev_err_probe(dev, PTR_ERR(ctx->gpio_reset), + "Failed to get reset GPIO\n"); + it66121_hw_reset(ctx); ctx->regmap = devm_regmap_init_i2c(client, &it66121_regmap_config); diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index c9e6505cbd88..2d02cc69f237 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -251,7 +251,6 @@ static void ge_b850v3_lvds_remove(void) goto out; drm_bridge_remove(&ge_b850v3_lvds_ptr->bridge); - ge_b850v3_lvds_ptr = NULL; out: mutex_unlock(&ge_b850v3_lvds_dev_mutex); @@ -261,6 +260,7 @@ static int ge_b850v3_register(void) { struct i2c_client *stdp4028_i2c = ge_b850v3_lvds_ptr->stdp4028_i2c; struct device *dev = &stdp4028_i2c->dev; + int ret; /* drm bridge initialization */ ge_b850v3_lvds_ptr->bridge.ops = DRM_BRIDGE_OP_DETECT | @@ -277,11 +277,15 @@ static int ge_b850v3_register(void) if (!stdp4028_i2c->irq) return 0; - return devm_request_threaded_irq(&stdp4028_i2c->dev, - stdp4028_i2c->irq, NULL, - ge_b850v3_lvds_irq_handler, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, - "ge-b850v3-lvds-dp", ge_b850v3_lvds_ptr); + ret = devm_request_threaded_irq(&stdp4028_i2c->dev, + stdp4028_i2c->irq, NULL, + ge_b850v3_lvds_irq_handler, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "ge-b850v3-lvds-dp", ge_b850v3_lvds_ptr); + if (ret) + drm_bridge_remove(&ge_b850v3_lvds_ptr->bridge); + + return ret; } static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c) diff --git a/drivers/gpu/drm/bridge/tda998x_drv.c b/drivers/gpu/drm/bridge/tda998x_drv.c index d9b388165de1..6c427bc75896 100644 --- a/drivers/gpu/drm/bridge/tda998x_drv.c +++ b/drivers/gpu/drm/bridge/tda998x_drv.c @@ -1293,7 +1293,7 @@ static const struct drm_edid *tda998x_edid_read(struct tda998x_priv *priv, * can't handle signals gracefully. */ if (tda998x_edid_delay_wait(priv)) - return 0; + return NULL; if (priv->rev == TDA19988) reg_clear(priv, REG_TX4, TX4_PD_RAM); @@ -1762,7 +1762,7 @@ static const struct drm_bridge_funcs tda998x_bridge_funcs = { static int tda998x_get_audio_ports(struct tda998x_priv *priv, struct device_node *np) { - const u32 *port_data; + const __be32 *port_data; u32 size; int i; diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index c598b99673fc..e7db4e4ea700 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -831,7 +831,7 @@ static void fill_palette_332(struct drm_crtc *crtc, u16 r, u16 g, u16 b, } /** - * drm_crtc_fill_palette_332 - Programs a default palette for R332-like formats + * drm_crtc_fill_palette_332 - Programs a default palette for RGB332-like formats * @crtc: The displaying CRTC * @set_palette: Callback for programming the hardware gamma LUT * diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 985c283cf59f..675675480da4 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -697,6 +697,7 @@ static void drm_dev_init_release(struct drm_device *dev, void *res) mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->clientlist_mutex); mutex_destroy(&dev->filelist_mutex); + mutex_destroy(&dev->gem_lru_mutex); } static int drm_dev_init(struct drm_device *dev, @@ -738,6 +739,7 @@ static int drm_dev_init(struct drm_device *dev, INIT_LIST_HEAD(&dev->vblank_event_list); spin_lock_init(&dev->event_lock); + mutex_init(&dev->gem_lru_mutex); mutex_init(&dev->filelist_mutex); mutex_init(&dev->clientlist_mutex); mutex_init(&dev->master_mutex); diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index a80a335f4148..1541fc8a9ac2 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -490,7 +490,7 @@ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off, * the number of horizontal pixels that need an update. */ off_t bit_off = (off % line_length) * 8; - off_t bit_end = (end % line_length) * 8; + off_t bit_end = bit_off + len * 8; x1 = bit_off / info->var.bits_per_pixel; x2 = DIV_ROUND_UP(bit_end, info->var.bits_per_pixel); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index d6424267260b..e3b8a1f353cb 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1019,7 +1019,7 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_gem_change_handle *args = data; - struct drm_gem_object *obj; + struct drm_gem_object *obj, *idrobj; int handle, ret; if (!drm_core_check_feature(dev, DRIVER_GEM)) @@ -1042,12 +1042,30 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, mutex_lock(&file_priv->prime.lock); spin_lock(&file_priv->table_lock); + + /* When create_tail allocs an obj idr, it needs to first alloc as NULL, + * then later replace with the correct object. This is not necessary + * here, because the only operations that could race are drm_prime + * bookkeeping, and we hold the prime lock. + */ ret = idr_alloc(&file_priv->object_idr, obj, handle, handle + 1, GFP_NOWAIT); - spin_unlock(&file_priv->table_lock); - if (ret < 0) - goto out_unlock; + if (ret < 0) { + spin_unlock(&file_priv->table_lock); + goto out_unlock; + } + + idrobj = idr_replace(&file_priv->object_idr, NULL, handle); + if (idrobj != obj) { + idr_replace(&file_priv->object_idr, idrobj, handle); + idr_remove(&file_priv->object_idr, args->new_handle); + spin_unlock(&file_priv->table_lock); + ret = -ENOENT; + goto out_unlock; + } + + spin_unlock(&file_priv->table_lock); if (obj->dma_buf) { ret = drm_prime_add_buf_handle(&file_priv->prime, obj->dma_buf, @@ -1066,7 +1084,9 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data, spin_lock(&file_priv->table_lock); idr_remove(&file_priv->object_idr, args->handle); + idrobj = idr_replace(&file_priv->object_idr, obj, handle); spin_unlock(&file_priv->table_lock); + WARN_ON(idrobj != NULL); out_unlock: mutex_unlock(&file_priv->prime.lock); @@ -1541,12 +1561,10 @@ EXPORT_SYMBOL(drm_gem_unlock_reservations); * drm_gem_lru_init - initialize a LRU * * @lru: The LRU to initialize - * @lock: The lock protecting the LRU */ void -drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock) +drm_gem_lru_init(struct drm_gem_lru *lru) { - lru->lock = lock; lru->count = 0; INIT_LIST_HEAD(&lru->list); } @@ -1571,14 +1589,10 @@ drm_gem_lru_remove_locked(struct drm_gem_object *obj) void drm_gem_lru_remove(struct drm_gem_object *obj) { - struct drm_gem_lru *lru = obj->lru; - - if (!lru) - return; - - mutex_lock(lru->lock); - drm_gem_lru_remove_locked(obj); - mutex_unlock(lru->lock); + mutex_lock(&obj->dev->gem_lru_mutex); + if (obj->lru) + drm_gem_lru_remove_locked(obj); + mutex_unlock(&obj->dev->gem_lru_mutex); } EXPORT_SYMBOL(drm_gem_lru_remove); @@ -1593,7 +1607,7 @@ EXPORT_SYMBOL(drm_gem_lru_remove); void drm_gem_lru_move_tail_locked(struct drm_gem_lru *lru, struct drm_gem_object *obj) { - lockdep_assert_held_once(lru->lock); + lockdep_assert_held_once(&obj->dev->gem_lru_mutex); if (obj->lru) drm_gem_lru_remove_locked(obj); @@ -1617,9 +1631,9 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail_locked); void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj) { - mutex_lock(lru->lock); + mutex_lock(&obj->dev->gem_lru_mutex); drm_gem_lru_move_tail_locked(lru, obj); - mutex_unlock(lru->lock); + mutex_unlock(&obj->dev->gem_lru_mutex); } EXPORT_SYMBOL(drm_gem_lru_move_tail); @@ -1633,6 +1647,7 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail); * of the shrink callback to check for this (ie. dma_resv_test_signaled()) * or if necessary block until the buffer becomes idle. * + * @dev: DRM device the LRU belongs to * @lru: The LRU to scan * @nr_to_scan: The number of pages to try to reclaim * @remaining: The number of pages left to reclaim, should be initialized by caller @@ -1640,7 +1655,8 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail); * @ticket: Optional ww_acquire_ctx context to use for locking */ unsigned long -drm_gem_lru_scan(struct drm_gem_lru *lru, +drm_gem_lru_scan(struct drm_device *dev, + struct drm_gem_lru *lru, unsigned int nr_to_scan, unsigned long *remaining, bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket), @@ -1650,9 +1666,9 @@ drm_gem_lru_scan(struct drm_gem_lru *lru, struct drm_gem_object *obj; unsigned freed = 0; - drm_gem_lru_init(&still_in_lru, lru->lock); + drm_gem_lru_init(&still_in_lru); - mutex_lock(lru->lock); + mutex_lock(&dev->gem_lru_mutex); while (freed < nr_to_scan) { obj = list_first_entry_or_null(&lru->list, typeof(*obj), lru_node); @@ -1675,7 +1691,7 @@ drm_gem_lru_scan(struct drm_gem_lru *lru, * rest of the loop body, to reduce contention with other * code paths that need the LRU lock */ - mutex_unlock(lru->lock); + mutex_unlock(&dev->gem_lru_mutex); if (ticket) ww_acquire_init(ticket, &reservation_ww_class); @@ -1709,7 +1725,7 @@ drm_gem_lru_scan(struct drm_gem_lru *lru, tail: drm_gem_object_put(obj); - mutex_lock(lru->lock); + mutex_lock(&dev->gem_lru_mutex); } /* @@ -1721,7 +1737,7 @@ tail: list_splice_tail(&still_in_lru.list, &lru->list); lru->count += still_in_lru.count; - mutex_unlock(lru->lock); + mutex_unlock(&dev->gem_lru_mutex); return freed; } diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 9166c353f131..88808e972cc1 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -172,8 +172,8 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, } for (i = 0; i < info->num_planes; i++) { - unsigned int width = mode_cmd->width / (i ? info->hsub : 1); - unsigned int height = mode_cmd->height / (i ? info->vsub : 1); + unsigned int width = drm_format_info_plane_width(info, mode_cmd->width, i); + unsigned int height = drm_format_info_plane_height(info, mode_cmd->height, i); unsigned int min_size; objs[i] = drm_gem_object_lookup(file, mode_cmd->handles[i]); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index df4232d7e135..3cc50d697c89 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -116,16 +116,18 @@ int etnaviv_sched_push_job(struct etnaviv_gem_submit *submit) */ mutex_lock(&gpu->sched_lock); + ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id, + NULL, xa_limit_32b, &gpu->next_user_fence, + GFP_KERNEL); + if (ret < 0) + goto out_unlock; + drm_sched_job_arm(&submit->sched_job); submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished); - ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id, - submit->out_fence, xa_limit_32b, - &gpu->next_user_fence, GFP_KERNEL); - if (ret < 0) { - drm_sched_job_cleanup(&submit->sched_job); - goto out_unlock; - } + + xa_store(&gpu->user_fences, submit->out_fence_id, + submit->out_fence, GFP_KERNEL); /* the scheduler holds on to the job now */ kref_get(&submit->refcount); diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c index 29a8366513fa..e68c954ec3e6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_mic.c +++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c @@ -423,7 +423,9 @@ static int exynos_mic_probe(struct platform_device *pdev) mic->bridge.of_node = dev->of_node; - drm_bridge_add(&mic->bridge); + ret = devm_drm_bridge_add(dev, &mic->bridge); + if (ret) + goto err; pm_runtime_enable(dev); @@ -443,12 +445,8 @@ err: static void exynos_mic_remove(struct platform_device *pdev) { - struct exynos_mic *mic = platform_get_drvdata(pdev); - component_del(&pdev->dev, &exynos_mic_component_ops); pm_runtime_disable(&pdev->dev); - - drm_bridge_remove(&mic->bridge); } static const struct of_device_id exynos_mic_of_match[] = { diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c index 58d7e191fd56..403d21cbb3a2 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c @@ -580,6 +580,7 @@ static int oaktrail_hdmi_get_modes(struct drm_connector *connector) } else { edid = (struct edid *)raw_edid; /* FIXME ? edid = drm_get_edid(connector, i2c_adap); */ + i2c_put_adapter(i2c_adap); } if (edid) { diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c index 884d324f0044..e194d0cce067 100644 --- a/drivers/gpu/drm/gma500/oaktrail_lvds.c +++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c @@ -293,7 +293,7 @@ void oaktrail_lvds_init(struct drm_device *dev, { struct gma_encoder *gma_encoder; struct gma_connector *gma_connector; - struct gma_i2c_chan *ddc_bus; + struct gma_i2c_chan *ddc_bus = NULL; struct drm_connector *connector; struct drm_encoder *encoder; struct drm_psb_private *dev_priv = to_drm_psb_private(dev); @@ -367,6 +367,8 @@ void oaktrail_lvds_init(struct drm_device *dev, if (edid == NULL && dev_priv->lpc_gpio_base) { ddc_bus = oaktrail_lvds_i2c_init(dev); if (!IS_ERR(ddc_bus)) { + if (i2c_adap) + i2c_put_adapter(i2c_adap); i2c_adap = &ddc_bus->base; edid = drm_get_edid(connector, i2c_adap); } @@ -421,7 +423,10 @@ out: err_unlock: mutex_unlock(&dev->mode_config.mutex); - gma_i2c_destroy(to_gma_i2c_chan(connector->ddc)); + if (!IS_ERR_OR_NULL(ddc_bus)) + gma_i2c_destroy(ddc_bus); + else if (i2c_adap) + i2c_put_adapter(i2c_adap); drm_encoder_cleanup(encoder); err_connector_cleanup: drm_connector_cleanup(connector); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index f6cd0a062090..9c7c357afb09 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -584,6 +584,7 @@ struct intel_connector { struct { u8 dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; + u8 intel_wa_dpcd; bool support; bool su_support; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 4955bd8b11d7..6ef2a0043cda 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -3119,8 +3119,13 @@ static void intel_dp_compute_vsc_colorimetry(const struct intel_crtc_state *crtc drm_WARN_ON(display->drm, vsc->bpc == 6 && vsc->pixelformat != DP_PIXELFORMAT_RGB); - /* all YCbCr are always limited range */ - vsc->dynamic_range = DP_DYNAMIC_RANGE_CTA; + /* All YCbCr formats are always limited range. */ + if (vsc->pixelformat == DP_PIXELFORMAT_RGB) + vsc->dynamic_range = crtc_state->limited_color_range ? + DP_DYNAMIC_RANGE_CTA : DP_DYNAMIC_RANGE_VESA; + else + vsc->dynamic_range = DP_DYNAMIC_RANGE_CTA; + vsc->content_type = DP_CONTENT_TYPE_NOT_DEFINED; } @@ -5298,7 +5303,7 @@ int intel_dp_as_sdp_unpack(struct drm_dp_as_sdp *as_sdp, as_sdp->length = sdp->sdp_header.HB3 & DP_ADAPTIVE_SYNC_SDP_LENGTH; as_sdp->mode = sdp->db[0] & DP_ADAPTIVE_SYNC_SDP_OPERATION_MODE; as_sdp->vtotal = (sdp->db[2] << 8) | sdp->db[1]; - as_sdp->target_rr = (u64)sdp->db[3] | ((u64)sdp->db[4] & 0x3); + as_sdp->target_rr = ((sdp->db[4] & 0x3) << 8) | sdp->db[3]; as_sdp->target_rr_divider = sdp->db[4] & 0x20 ? true : false; return 0; diff --git a/drivers/gpu/drm/i915/display/intel_dpcd.h b/drivers/gpu/drm/i915/display/intel_dpcd.h new file mode 100644 index 000000000000..4aea5326f2ed --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dpcd.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef __INTEL_DPCD_H__ +#define __INTEL_DPCD_H__ + +#define INTEL_DPCD_INTEL_WA_REGISTER_CAPS 0x3f0 +# define INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_EARLYSCANLINE_SDP_SUPPORT_MASK REG_GENMASK(1, 0) +# define INTEL_DPCD_INTEL_WA_REGISTER_CAPS_FALL_BACK_TO_PSR1 0 +# define INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_WITH_EARLY_SCANLINE 1 +# define INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_WITHOUT_EARLY_SCANLINE 2 + +#endif /* __INTEL_DPCD_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c index 5390ceb21ca4..82f445c83158 100644 --- a/drivers/gpu/drm/i915/display/intel_plane.c +++ b/drivers/gpu/drm/i915/display/intel_plane.c @@ -373,7 +373,7 @@ intel_plane_color_copy_uapi_to_hw_state(struct intel_plane_state *plane_state, bool changed = false; int i = 0; - iter_colorop = plane_state->uapi.color_pipeline; + iter_colorop = from_plane_state->uapi.color_pipeline; while (iter_colorop) { for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) { diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 53c10ae76ab5..29904a037575 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -43,6 +43,7 @@ #include "intel_display_wa.h" #include "intel_dmc.h" #include "intel_dp.h" +#include "intel_dpcd.h" #include "intel_dp_aux.h" #include "intel_dsb.h" #include "intel_frontbuffer.h" @@ -716,8 +717,14 @@ static void _psr_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *co connector->dp.psr_caps.su_support ? "" : "not "); } - if (connector->dp.psr_caps.su_support) + if (connector->dp.psr_caps.su_support) { + ret = drm_dp_dpcd_read_byte(&intel_dp->aux, + INTEL_DPCD_INTEL_WA_REGISTER_CAPS, + &connector->dp.psr_caps.intel_wa_dpcd); + if (ret < 0) + return; _psr_compute_su_granularity(intel_dp, connector); + } } void intel_psr_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector) @@ -1358,9 +1365,35 @@ static bool psr2_granularity_check(struct intel_crtc_state *crtc_state, return true; } -static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_dp, - struct intel_crtc_state *crtc_state) +static bool apply_scanline_indication_wa(struct intel_crtc_state *crtc_state, + struct intel_connector *connector) +{ + struct intel_dp *intel_dp = intel_attached_dp(connector); + u8 early_scanline_support = connector->dp.psr_caps.intel_wa_dpcd & + INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_EARLYSCANLINE_SDP_SUPPORT_MASK; + + if (intel_dp->edp_dpcd[0] >= DP_EDP_15) + return true; + + switch (early_scanline_support) { + case INTEL_DPCD_INTEL_WA_REGISTER_CAPS_FALL_BACK_TO_PSR1: + crtc_state->req_psr2_sdp_prior_scanline = false; + return false; + case INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_WITH_EARLY_SCANLINE: + return true; + case INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_WITHOUT_EARLY_SCANLINE: + crtc_state->req_psr2_sdp_prior_scanline = false; + return true; + default: + MISSING_CASE(early_scanline_support); + return false; + } +} + +static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_crtc_state *crtc_state, + struct intel_connector *connector) { + struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_display *display = to_intel_display(intel_dp); const struct drm_display_mode *adjusted_mode = &crtc_state->uapi.adjusted_mode; u32 hblank_total, hblank_ns, req_ns; @@ -1379,7 +1412,8 @@ static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_d return false; crtc_state->req_psr2_sdp_prior_scanline = true; - return true; + + return apply_scanline_indication_wa(crtc_state, connector); } static int intel_psr_entry_setup_frames(struct intel_dp *intel_dp, @@ -1660,7 +1694,7 @@ static bool intel_sel_update_config_valid(struct intel_crtc_state *crtc_state, conn_state)) goto unsupported; - if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) { + if (!_compute_psr2_sdp_prior_scanline_indication(crtc_state, connector)) { drm_dbg_kms(display->drm, "Selective update not enabled, SDP indication do not fit in hblank\n"); goto unsupported; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 984d0056c01c..adff482a6c9c 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -132,7 +132,8 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) rcu_read_lock(); /* protect the GEM context */ if (guilty) { i915_request_set_error_once(rq, -EIO); - __i915_request_skip(rq); + if (!i915_request_signaled(rq)) + __i915_request_skip(rq); banned = mark_guilty(rq); } else { i915_request_set_error_once(rq, -EAGAIN); diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 385a634c3ed0..d9be7a5a239c 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -750,9 +750,8 @@ static bool has_auxccs(struct drm_device *drm) { struct drm_i915_private *i915 = to_i915(drm); - return IS_GRAPHICS_VER(i915, 9, 12) || - IS_ALDERLAKE_P(i915) || - IS_METEORLAKE(i915); + return IS_GRAPHICS_VER(i915, 9, 12) && + !HAS_FLAT_CCS(i915); } static bool has_fenced_regions(struct drm_device *drm) diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c index e154cb35f604..6193811ef7be 100644 --- a/drivers/gpu/drm/imagination/pvr_fw_trace.c +++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c @@ -558,6 +558,6 @@ pvr_fw_trace_debugfs_init(struct pvr_device *pvr_dev, struct dentry *dir) &pvr_fw_trace_fops); } - debugfs_create_file("trace_mask", 0600, dir, fw_trace, + debugfs_create_file("trace_mask", 0600, dir, pvr_dev, &pvr_fw_trace_mask_fops); } diff --git a/drivers/gpu/drm/loongson/lsdc_drv.c b/drivers/gpu/drm/loongson/lsdc_drv.c index 1ece1ea42f78..34405073c4d4 100644 --- a/drivers/gpu/drm/loongson/lsdc_drv.c +++ b/drivers/gpu/drm/loongson/lsdc_drv.c @@ -293,7 +293,7 @@ static int lsdc_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) vga_client_register(pdev, lsdc_vga_set_decode); - drm_kms_helper_poll_init(ddev); + drmm_kms_helper_poll_init(ddev); if (loongson_vblank) { ret = drm_vblank_init(ddev, descp->num_of_crtc); diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c index c7be530ca041..b8ccd6e55bed 100644 --- a/drivers/gpu/drm/mediatek/mtk_cec.c +++ b/drivers/gpu/drm/mediatek/mtk_cec.c @@ -240,7 +240,7 @@ static const struct of_device_id mtk_cec_of_ids[] = { }; MODULE_DEVICE_TABLE(of, mtk_cec_of_ids); -struct platform_driver mtk_cec_driver = { +static struct platform_driver mtk_cec_driver = { .probe = mtk_cec_probe, .remove = mtk_cec_remove, .driver = { diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c index 6358e1af69b4..2acbdb025d89 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c @@ -328,7 +328,7 @@ static const struct of_device_id mtk_hdmi_ddc_match[] = { }; MODULE_DEVICE_TABLE(of, mtk_hdmi_ddc_match); -struct platform_driver mtk_hdmi_ddc_driver = { +static struct platform_driver mtk_hdmi_ddc_driver = { .probe = mtk_hdmi_ddc_probe, .remove = mtk_hdmi_ddc_remove, .driver = { diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c index d937219fdb7e..31e81a6de6d8 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c @@ -389,7 +389,7 @@ static const struct of_device_id mtk_hdmi_ddc_v2_match[] = { }; MODULE_DEVICE_TABLE(of, mtk_hdmi_ddc_v2_match); -struct platform_driver mtk_hdmi_ddc_v2_driver = { +static struct platform_driver mtk_hdmi_ddc_v2_driver = { .probe = mtk_hdmi_ddc_v2_probe, .driver = { .name = "mediatek-hdmi-ddc-v2", diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_v2.c b/drivers/gpu/drm/mediatek/mtk_hdmi_v2.c index b5c738380dc2..a8eb6fd0908b 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi_v2.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi_v2.c @@ -50,7 +50,7 @@ enum mtk_hdmi_v2_clk_id { MTK_HDMI_V2_CLK_COUNT, }; -const char *const mtk_hdmi_v2_clk_names[MTK_HDMI_V2_CLK_COUNT] = { +static const char *const mtk_hdmi_v2_clk_names[MTK_HDMI_V2_CLK_COUNT] = { [MTK_HDMI_V2_CLK_HDMI_APB_SEL] = "bus", [MTK_HDMI_V2_CLK_HDCP_SEL] = "hdcp", [MTK_HDMI_V2_CLK_HDCP_24M_SEL] = "hdcp24m", diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index d5aba072f44c..7a3e3c2f5cf3 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -2621,7 +2621,6 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) struct platform_device *pdev = priv->gpu_pdev; struct adreno_platform_config *config = pdev->dev.platform_data; const struct adreno_info *info = config->info; - struct device_node *node; struct a6xx_gpu *a6xx_gpu; struct adreno_gpu *adreno_gpu; struct msm_gpu *gpu; @@ -2643,7 +2642,8 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = NULL; /* Check if there is a GMU phandle and set it up */ - node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); + struct device_node *node __free(device_node) = + of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); /* FIXME: How do we gracefully handle this? */ BUG_ON(!node); @@ -2690,7 +2690,6 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) ret = a6xx_gmu_wrapper_init(a6xx_gpu, node); else ret = a6xx_gmu_init(a6xx_gpu, node); - of_node_put(node); if (ret) { a6xx_destroy(&(a6xx_gpu->base.base)); return ERR_PTR(ret); @@ -2740,6 +2739,7 @@ const struct adreno_gpu_funcs a6xx_gpu_funcs = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, }, .init = a6xx_gpu_init, .get_timestamp = a6xx_gmu_get_timestamp, @@ -2808,6 +2808,7 @@ const struct adreno_gpu_funcs a7xx_gpu_funcs = { .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, + .sysprof_setup = a6xx_gmu_sysprof_setup, }, .init = a6xx_gpu_init, .get_timestamp = a6xx_gmu_get_timestamp, diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index 487c2736f2b3..186a73c0b99c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -289,6 +289,8 @@ static int a8xx_hfi_send_perf_table(struct a6xx_gmu *gmu) (gmu->nr_gpu_freqs * num_gx_votes * sizeof(gmu->gx_arc_votes[0])) + (gmu->nr_gmu_freqs * num_cx_votes * sizeof(gmu->cx_arc_votes[0])); tbl = kzalloc(size, GFP_KERNEL); + if (!tbl) + return -ENOMEM; tbl->type = HFI_TABLE_GPU_PERF; /* First fill GX votes */ diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 4edfe80c5be7..fc38331ce640 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -17,7 +17,7 @@ MODULE_PARM_DESC(snapshot_debugbus, "Include debugbus sections in GPU devcoredum module_param_named(snapshot_debugbus, snapshot_debugbus, bool, 0600); int enable_preemption = -1; -MODULE_PARM_DESC(enable_preemption, "Enable preemption (A7xx only) (1=on , 0=disable, -1=auto (default))"); +MODULE_PARM_DESC(enable_preemption, "Enable preemption (A7xx+ only) (1=on , 0=disable, -1=auto (default))"); module_param(enable_preemption, int, 0600); bool disable_acd; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 66f80f2d12f9..03f96a1154e1 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -376,7 +376,7 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, *value = adreno_gpu->info->gmem; return 0; case MSM_PARAM_GMEM_BASE: - if (adreno_gpu->info->family >= ADRENO_6XX_GEN4) + if (adreno_gpu->info->family >= ADRENO_6XX_GEN3) *value = 0; else *value = 0x100000; @@ -424,15 +424,21 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, *value = vm->mm_range; return 0; case MSM_PARAM_HIGHEST_BANK_BIT: + if (!adreno_gpu->ubwc_config) + return UERR(ENOENT, drm, "no UBWC on this platform"); *value = adreno_gpu->ubwc_config->highest_bank_bit; return 0; case MSM_PARAM_RAYTRACING: *value = adreno_gpu->has_ray_tracing; return 0; case MSM_PARAM_UBWC_SWIZZLE: + if (!adreno_gpu->ubwc_config) + return UERR(ENOENT, drm, "no UBWC on this platform"); *value = adreno_gpu->ubwc_config->ubwc_swizzle; return 0; case MSM_PARAM_MACROTILE_MODE: + if (!adreno_gpu->ubwc_config) + return UERR(ENOENT, drm, "no UBWC on this platform"); *value = adreno_gpu->ubwc_config->macrotile_mode; return 0; case MSM_PARAM_UCHE_TRAP_BASE: diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h index b7b06e45b529..06da1583fb1e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h @@ -480,7 +480,7 @@ const struct dpu_mdss_cfg dpu_kaanapali_cfg = { .wb_count = ARRAY_SIZE(kaanapali_wb), .wb = kaanapali_wb, .cwb_count = ARRAY_SIZE(kaanapali_cwb), - .cwb = sm8650_cwb, + .cwb = kaanapali_cwb, .intf_count = ARRAY_SIZE(kaanapali_intf), .intf = kaanapali_intf, .vbif = &sm8650_vbif, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c index 6e8883dbfad4..590922c4f69b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c @@ -61,7 +61,7 @@ static int _dpu_format_populate_plane_sizes_ubwc( bool meta = MSM_FORMAT_IS_UBWC(fmt); if (MSM_FORMAT_IS_YUV(fmt)) { - unsigned int stride, sclines; + unsigned int stride, y_sclines, uv_sclines; unsigned int y_tile_width, y_tile_height; unsigned int y_meta_stride, y_meta_scanlines; unsigned int uv_meta_stride, uv_meta_scanlines; @@ -77,23 +77,25 @@ static int _dpu_format_populate_plane_sizes_ubwc( y_tile_width = 32; } - sclines = round_up(fb->height, 16); + y_sclines = round_up(fb->height, 16); + uv_sclines = round_up((fb->height+1)>>1, 16); y_tile_height = 4; } else { stride = round_up(fb->width, 128); y_tile_width = 32; - sclines = round_up(fb->height, 32); + y_sclines = round_up(fb->height, 32); + uv_sclines = round_up((fb->height+1)>>1, 32); y_tile_height = 8; } layout->plane_pitch[0] = stride; layout->plane_size[0] = round_up(layout->plane_pitch[0] * - sclines, DPU_UBWC_PLANE_SIZE_ALIGNMENT); + y_sclines, DPU_UBWC_PLANE_SIZE_ALIGNMENT); layout->plane_pitch[1] = stride; layout->plane_size[1] = round_up(layout->plane_pitch[1] * - sclines, DPU_UBWC_PLANE_SIZE_ALIGNMENT); + uv_sclines, DPU_UBWC_PLANE_SIZE_ALIGNMENT); if (!meta) return 0; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index 7545c0293efb..6f2370c9dd98 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -5,6 +5,7 @@ #include <drm/drm_edid.h> #include <drm/drm_framebuffer.h> +#include <drm/drm_managed.h> #include "dpu_writeback.h" @@ -125,7 +126,7 @@ int dpu_writeback_init(struct drm_device *dev, struct drm_encoder *enc, struct dpu_wb_connector *dpu_wb_conn; int rc = 0; - dpu_wb_conn = devm_kzalloc(dev->dev, sizeof(*dpu_wb_conn), GFP_KERNEL); + dpu_wb_conn = drmm_kzalloc(dev, sizeof(*dpu_wb_conn), GFP_KERNEL); if (!dpu_wb_conn) return -ENOMEM; diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c index 427d3ee2b833..e603ab3817cd 100644 --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c @@ -5,11 +5,11 @@ #define pr_fmt(fmt) "[drm:%s:%d] " fmt, __func__, __LINE__ -#include <generated/utsrelease.h> +#include <linux/utsname.h> #include "msm_disp_snapshot.h" -static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *base_addr) +static void msm_disp_state_dump_regs(u32 **reg, u32 len, void __iomem *base_addr) { u32 len_padded; u32 num_rows; @@ -19,11 +19,11 @@ static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *b void __iomem *end_addr; int i; - len_padded = aligned_len * REG_DUMP_ALIGN; - num_rows = aligned_len / REG_DUMP_ALIGN; + len_padded = round_up(len, REG_DUMP_ALIGN); + num_rows = DIV_ROUND_UP(len, REG_DUMP_ALIGN); addr = base_addr; - end_addr = base_addr + aligned_len; + end_addr = base_addr + len; *reg = kvzalloc(len_padded, GFP_KERNEL); if (!*reg) @@ -48,8 +48,8 @@ static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *b static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len, void __iomem *base_addr, struct drm_printer *p) { + void __iomem *addr, *end_addr; int i; - void __iomem *addr; u32 num_rows; if (!dump_addr) { @@ -58,6 +58,7 @@ static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len, } addr = base_addr; + end_addr = base_addr + len; num_rows = len / REG_DUMP_ALIGN; for (i = 0; i < num_rows; i++) { @@ -67,6 +68,17 @@ static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len, dump_addr[i * 4 + 2], dump_addr[i * 4 + 3]); addr += REG_DUMP_ALIGN; } + + if (addr != end_addr) { + drm_printf(p, "0x%lx : %08x", + (unsigned long)(addr - base_addr), + dump_addr[i * 4]); + if (addr + 0x4 < end_addr) + drm_printf(p, " %08x", dump_addr[i * 4 + 1]); + if (addr + 0x8 < end_addr) + drm_printf(p, " %08x", dump_addr[i * 4 + 2]); + drm_printf(p, "\n"); + } } void msm_disp_state_print(struct msm_disp_state *state, struct drm_printer *p) @@ -79,7 +91,7 @@ void msm_disp_state_print(struct msm_disp_state *state, struct drm_printer *p) } drm_printf(p, "---\n"); - drm_printf(p, "kernel: " UTS_RELEASE "\n"); + drm_printf(p, "kernel: %s\n", init_utsname()->release); drm_printf(p, "module: " KBUILD_MODNAME "\n"); drm_printf(p, "dpu devcoredump\n"); drm_printf(p, "time: %ptSp\n", &state->time); @@ -185,7 +197,7 @@ void msm_disp_snapshot_add_block(struct msm_disp_state *disp_state, u32 len, va_end(va); INIT_LIST_HEAD(&new_blk->node); - new_blk->size = ALIGN(len, REG_DUMP_ALIGN); + new_blk->size = len; new_blk->base_addr = base_addr; msm_disp_state_dump_regs(&new_blk->state, new_blk->size, base_addr); diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 565d425f88b8..982abaaac00d 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -2033,6 +2033,7 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi) /* fixup base address by io offset */ msm_host->ctrl_base += cfg->io_offset; + msm_host->ctrl_size -= cfg->io_offset; ret = devm_regulator_bulk_get_const(&pdev->dev, cfg->num_regulators, cfg->regulator_data, diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 195f40e331e5..cc2bcd14b1c2 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -128,11 +128,10 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv, /* * Initialize the LRUs: */ - mutex_init(&priv->lru.lock); - drm_gem_lru_init(&priv->lru.unbacked, &priv->lru.lock); - drm_gem_lru_init(&priv->lru.pinned, &priv->lru.lock); - drm_gem_lru_init(&priv->lru.willneed, &priv->lru.lock); - drm_gem_lru_init(&priv->lru.dontneed, &priv->lru.lock); + drm_gem_lru_init(&priv->lru.unbacked); + drm_gem_lru_init(&priv->lru.pinned); + drm_gem_lru_init(&priv->lru.willneed); + drm_gem_lru_init(&priv->lru.dontneed); /* Initialize stall-on-fault */ spin_lock_init(&priv->fault_stall_lock); @@ -140,7 +139,7 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv, /* Teach lockdep about lock ordering wrt. shrinker: */ fs_reclaim_acquire(GFP_KERNEL); - might_lock(&priv->lru.lock); + might_lock(&ddev->gem_lru_mutex); fs_reclaim_release(GFP_KERNEL); if (priv->kms_init) { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 6d847d593f1a..617b3c4b42c0 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -150,13 +150,6 @@ struct msm_drm_private { * DONTNEED state (ie. can be purged) */ struct drm_gem_lru dontneed; - - /** - * lock: - * - * Protects manipulation of all of the LRUs. - */ - struct mutex lock; } lru; struct notifier_block vmap_notifier; diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 2cb3ab04f125..efd3d3c9a449 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -177,11 +177,11 @@ static void update_lru_locked(struct drm_gem_object *obj) static void update_lru(struct drm_gem_object *obj) { - struct msm_drm_private *priv = obj->dev->dev_private; + struct drm_device *dev = obj->dev; - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); update_lru_locked(obj); - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); } static struct page **get_pages(struct drm_gem_object *obj) @@ -292,11 +292,11 @@ void msm_gem_pin_obj_locked(struct drm_gem_object *obj) static void pin_obj_locked(struct drm_gem_object *obj) { - struct msm_drm_private *priv = obj->dev->dev_private; + struct drm_device *dev = obj->dev; - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); msm_gem_pin_obj_locked(obj); - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); } struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj) @@ -487,16 +487,16 @@ int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct drm_gpuva *vma) void msm_gem_unpin_locked(struct drm_gem_object *obj) { - struct msm_drm_private *priv = obj->dev->dev_private; + struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj); msm_gem_assert_locked(obj); - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); msm_obj->pin_count--; GEM_WARN_ON(msm_obj->pin_count < 0); update_lru_locked(obj); - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); } /* Special unpin path for use in fence-signaling path, avoiding the need @@ -507,10 +507,10 @@ void msm_gem_unpin_locked(struct drm_gem_object *obj) */ void msm_gem_unpin_active(struct drm_gem_object *obj) { - struct msm_drm_private *priv = obj->dev->dev_private; + struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj); - GEM_WARN_ON(!mutex_is_locked(&priv->lru.lock)); + GEM_WARN_ON(!mutex_is_locked(&dev->gem_lru_mutex)); msm_obj->pin_count--; GEM_WARN_ON(msm_obj->pin_count < 0); @@ -797,12 +797,12 @@ void msm_gem_put_vaddr(struct drm_gem_object *obj) */ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv) { - struct msm_drm_private *priv = obj->dev->dev_private; + struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj); msm_gem_lock(obj); - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); if (msm_obj->madv != __MSM_MADV_PURGED) msm_obj->madv = madv; @@ -814,7 +814,7 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv) */ update_lru_locked(obj); - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); msm_gem_unlock(obj); @@ -824,7 +824,6 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv) void msm_gem_purge(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; - struct msm_drm_private *priv = obj->dev->dev_private; struct msm_gem_object *msm_obj = to_msm_bo(obj); msm_gem_assert_locked(obj); @@ -839,10 +838,10 @@ void msm_gem_purge(struct drm_gem_object *obj) put_pages(obj); - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); /* A one-way transition: */ msm_obj->madv = __MSM_MADV_PURGED; - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); drm_gem_free_mmap_offset(obj); diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 31fa51a44f86..9d2788f79ace 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -43,8 +43,7 @@ msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) } static bool -with_vm_locks(struct ww_acquire_ctx *ticket, - void (*fn)(struct drm_gem_object *obj), +with_vm_locks(void (*fn)(struct drm_gem_object *obj), struct drm_gem_object *obj) { /* @@ -52,7 +51,7 @@ with_vm_locks(struct ww_acquire_ctx *ticket, * success paths */ struct drm_gpuvm_bo *vm_bo, *last_locked = NULL; - int ret = 0; + bool locked = true; drm_gem_for_each_gpuvm_bo (vm_bo, obj) { struct dma_resv *resv = drm_gpuvm_resv(vm_bo->vm); @@ -60,23 +59,14 @@ with_vm_locks(struct ww_acquire_ctx *ticket, if (resv == obj->resv) continue; - ret = dma_resv_lock(resv, ticket); - - /* - * Since we already skip the case when the VM and obj - * share a resv (ie. _NO_SHARE objs), we don't expect - * to hit a double-locking scenario... which the lock - * unwinding cannot really cope with. - */ - WARN_ON(ret == -EALREADY); - /* - * Don't bother with slow-lock / backoff / retry sequence, - * if we can't get the lock just give up and move on to - * the next object. + * dma_resv_lock can't be used due to acquiring 'ticket' before the + * fs_reclaim lock, which is held in shrinker context */ - if (ret) + if (!dma_resv_trylock(resv)) { + locked = false; goto out_unlock; + } /* * Hold a ref to prevent the vm_bo from being freed @@ -108,11 +98,11 @@ out_unlock: } } - return ret == 0; + return locked; } static bool -purge(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket) +purge(struct drm_gem_object *obj, struct ww_acquire_ctx *unused) { if (!is_purgeable(to_msm_bo(obj))) return false; @@ -120,11 +110,11 @@ purge(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket) if (msm_gem_active(obj)) return false; - return with_vm_locks(ticket, msm_gem_purge, obj); + return with_vm_locks(msm_gem_purge, obj); } static bool -evict(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket) +evict(struct drm_gem_object *obj, struct ww_acquire_ctx *unused) { if (is_unevictable(to_msm_bo(obj))) return false; @@ -132,7 +122,7 @@ evict(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket) if (msm_gem_active(obj)) return false; - return with_vm_locks(ticket, msm_gem_evict, obj); + return with_vm_locks(msm_gem_evict, obj); } static bool @@ -164,7 +154,6 @@ static unsigned long msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { struct msm_drm_private *priv = shrinker->private_data; - struct ww_acquire_ctx ticket; struct { struct drm_gem_lru *lru; bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket); @@ -185,11 +174,14 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) for (unsigned i = 0; (nr > 0) && (i < ARRAY_SIZE(stages)); i++) { if (!stages[i].cond) continue; + /* + * 'ticket' not needed on trylock paths + */ stages[i].freed = - drm_gem_lru_scan(stages[i].lru, nr, + drm_gem_lru_scan(priv->dev, stages[i].lru, nr, &stages[i].remaining, stages[i].shrink, - &ticket); + NULL); nr -= stages[i].freed; freed += stages[i].freed; remaining += stages[i].remaining; @@ -255,7 +247,7 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) unsigned long remaining = 0; for (idx = 0; lrus[idx] && unmapped < vmap_shrink_limit; idx++) { - unmapped += drm_gem_lru_scan(lrus[idx], + unmapped += drm_gem_lru_scan(priv->dev, lrus[idx], vmap_shrink_limit - unmapped, &remaining, vmap_shrink, diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 26ea8a28be47..3c6bc90c3d48 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -352,7 +352,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit) static int submit_pin_objects(struct msm_gem_submit *submit) { - struct msm_drm_private *priv = submit->dev->dev_private; + struct drm_device *dev = submit->dev; int i, ret = 0; for (i = 0; i < submit->nr_bos; i++) { @@ -381,11 +381,11 @@ static int submit_pin_objects(struct msm_gem_submit *submit) * get_pages() which could trigger reclaim.. and if we held the LRU lock * could trigger deadlock with the shrinker). */ - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); for (i = 0; i < submit->nr_bos; i++) { msm_gem_pin_obj_locked(submit->bos[i].obj); } - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); submit->bos_pinned = true; diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 1a952b171ed7..c4cfe036066b 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -702,7 +702,7 @@ static struct dma_fence * msm_vma_job_run(struct drm_sched_job *_job) { struct msm_vm_bind_job *job = to_msm_vm_bind_job(_job); - struct msm_drm_private *priv = job->vm->drm->dev_private; + struct drm_device *dev = job->vm->drm; struct msm_gem_vm *vm = to_msm_vm(job->vm); struct drm_gem_object *obj; int ret = vm->unusable ? -EINVAL : 0; @@ -745,13 +745,13 @@ msm_vma_job_run(struct drm_sched_job *_job) if (ret) msm_gem_vm_unusable(job->vm); - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); job_foreach_bo (obj, job) { msm_gem_unpin_active(obj); } - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); /* VM_BIND ops are synchronous, so no fence to wait on: */ return NULL; @@ -1305,7 +1305,7 @@ vm_bind_job_pin_objects(struct msm_vm_bind_job *job) return PTR_ERR(pages); } - struct msm_drm_private *priv = job->vm->drm->dev_private; + struct drm_device *dev = job->vm->drm; /* * A second loop while holding the LRU lock (a) avoids acquiring/dropping @@ -1314,10 +1314,10 @@ vm_bind_job_pin_objects(struct msm_vm_bind_job *job) * get_pages() which could trigger reclaim.. and if we held the LRU lock * could trigger deadlock with the shrinker). */ - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); job_foreach_bo (obj, job) msm_gem_pin_obj_locked(obj); - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); job->bos_pinned = true; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 930e54d1b0a7..3f3925b11eea 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -13,11 +13,11 @@ #include "msm_gpu_trace.h" //#include "adreno/adreno_gpu.h" -#include <generated/utsrelease.h> #include <linux/string_helpers.h> #include <linux/devcoredump.h> #include <linux/sched/task.h> #include <linux/sched/mm.h> +#include <linux/utsname.h> /* * Power Management: @@ -196,7 +196,7 @@ static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset, p = drm_coredump_printer(&iter); drm_printf(&p, "---\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "kernel: %s\n", init_utsname()->release); drm_printf(&p, "module: " KBUILD_MODNAME "\n"); drm_printf(&p, "time: %ptSp\n", &state->time); if (state->comm) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 7d449e5202c5..058c71c82cf5 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -677,7 +677,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, int prot) { struct msm_iommu *iommu = to_msm_iommu(mmu); - size_t ret; + ssize_t ret; WARN_ON(off != 0); @@ -686,7 +686,8 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, iova |= GENMASK_ULL(63, 49); ret = iommu_map_sgtable(iommu->domain, iova, sgt, prot); - WARN_ON(!ret); + if (ret < 0) + return ret; return (ret == len) ? 0 : -EINVAL; } diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 30ddb5351e98..2d6b930b766e 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -16,13 +16,13 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) struct msm_gem_submit *submit = to_msm_submit(job); struct msm_fence_context *fctx = submit->ring->fctx; struct msm_gpu *gpu = submit->gpu; - struct msm_drm_private *priv = gpu->dev->dev_private; + struct drm_device *dev = gpu->dev; unsigned nr_cmds = submit->nr_cmds; int i; msm_fence_init(submit->hw_fence, fctx); - mutex_lock(&priv->lru.lock); + mutex_lock(&dev->gem_lru_mutex); for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = submit->bos[i].obj; @@ -32,7 +32,7 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) submit->bos_pinned = false; - mutex_unlock(&priv->lru.lock); + mutex_unlock(&dev->gem_lru_mutex); /* TODO move submit path over to using a per-ring lock.. */ mutex_lock(&gpu->lock); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 72848ed80df7..b101e14f841e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2513,6 +2513,7 @@ static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", .bar = { 0x00000001, tu102_bar_new }, + .bios = { 0x00000001, nvkm_bios_new }, .devinit = { 0x00000001, ga100_devinit_new }, .fault = { 0x00000001, tu102_fault_new }, .fb = { 0x00000001, ga100_fb_new }, @@ -2529,7 +2530,6 @@ nv170_chipset = { .vfn = { 0x00000001, ga100_vfn_new }, .ce = { 0x000003ff, ga100_ce_new }, .fifo = { 0x00000001, ga100_fifo_new }, - .sec2 = { 0x00000001, tu102_sec2_new }, }; static const struct nvkm_device_chip @@ -3341,7 +3341,6 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x166: device->chip = &nv166_chipset; break; case 0x167: device->chip = &nv167_chipset; break; case 0x168: device->chip = &nv168_chipset; break; - case 0x170: device->chip = &nv170_chipset; break; case 0x172: device->chip = &nv172_chipset; break; case 0x173: device->chip = &nv173_chipset; break; case 0x174: device->chip = &nv174_chipset; break; @@ -3361,6 +3360,14 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x1b6: device->chip = &nv1b6_chipset; break; case 0x1b7: device->chip = &nv1b7_chipset; break; default: + if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) { + switch (device->chipset) { + case 0x170: device->chip = &nv170_chipset; break; + default: + break; + } + } + if (!device->chip) { nvdev_error(device, "unknown chipset (%08x)\n", boot0); ret = -ENODEV; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c index fdd820eeef81..27a13aeccd3c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c @@ -41,11 +41,15 @@ ga100_gsp_flcn = { static const struct nvkm_gsp_func ga100_gsp = { .flcn = &ga100_gsp_flcn, + .fwsec = &tu102_gsp_fwsec, .sig_section = ".fwsignature_ga100", .booter.ctor = tu102_gsp_booter_ctor, + .fwsec_sb.ctor = tu102_gsp_fwsec_sb_ctor, + .fwsec_sb.dtor = tu102_gsp_fwsec_sb_dtor, + .dtor = r535_gsp_dtor, .oneinit = tu102_gsp_oneinit, .init = tu102_gsp_init, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c index dd82c76b8b9a..19cb269e7a26 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c @@ -318,13 +318,8 @@ tu102_gsp_oneinit(struct nvkm_gsp *gsp) if (ret) return ret; - /* - * Calculate FB layout. FRTS is a memory region created by the FWSEC-FRTS firmware. - * FWSEC comes from VBIOS. So on systems with no VBIOS (e.g. GA100), the FRTS does - * not exist. Therefore, use the existence of VBIOS to determine whether to reserve - * an FRTS region. - */ - gsp->fb.wpr2.frts.size = device->bios ? 0x100000 : 0; + /* Calculate FB layout. */ + gsp->fb.wpr2.frts.size = 0x100000; gsp->fb.wpr2.frts.addr = ALIGN_DOWN(gsp->fb.bios.addr, 0x20000) - gsp->fb.wpr2.frts.size; gsp->fb.wpr2.boot.size = gsp->boot.fw.size; @@ -348,12 +343,9 @@ tu102_gsp_oneinit(struct nvkm_gsp *gsp) if (ret) return ret; - /* Only boot FWSEC-FRTS if it actually exists */ - if (gsp->fb.wpr2.frts.size) { - ret = nvkm_gsp_fwsec_frts(gsp); - if (WARN_ON(ret)) - return ret; - } + ret = nvkm_gsp_fwsec_frts(gsp); + if (WARN_ON(ret)) + return ret; /* Reset GSP into RISC-V mode. */ ret = gsp->func->reset(gsp); diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index d6863b28ddc5..d592f4f4b939 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -208,6 +208,7 @@ config DRM_PANEL_HIMAX_HX83121A depends on OF depends on DRM_MIPI_DSI depends on BACKLIGHT_CLASS_DEVICE + select DRM_DISPLAY_DSC_HELPER select DRM_KMS_HELPER help Say Y here if you want to enable support for Himax HX83121A-based diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index d5fe105bdbdd..658ce64c71eb 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1324,6 +1324,8 @@ static int boe_panel_disable(struct drm_panel *panel) mipi_dsi_dcs_set_display_off_multi(&ctx); mipi_dsi_dcs_enter_sleep_mode_multi(&ctx); + boe->dsi->mode_flags |= MIPI_DSI_MODE_LPM; + mipi_dsi_msleep(&ctx, 150); return ctx.accum_err; diff --git a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c index 4f8d6d8c07e4..dbdb7e3cb7b6 100644 --- a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c +++ b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c @@ -98,9 +98,7 @@ static int feiyang_enable(struct drm_panel *panel) /* T12 (video & logic signal rise + backlight rise) T12 >= 200ms */ msleep(200); - mipi_dsi_dcs_set_display_on(ctx->dsi); - - return 0; + return mipi_dsi_dcs_set_display_on(ctx->dsi); } static int feiyang_disable(struct drm_panel *panel) diff --git a/drivers/gpu/drm/panel/panel-himax-hx83102.c b/drivers/gpu/drm/panel/panel-himax-hx83102.c index 8b2a68ee851e..a5e5c9ea7a73 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83102.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83102.c @@ -937,6 +937,8 @@ static int hx83102_disable(struct drm_panel *panel) mipi_dsi_dcs_set_display_off_multi(&dsi_ctx); mipi_dsi_dcs_enter_sleep_mode_multi(&dsi_ctx); + dsi->mode_flags |= MIPI_DSI_MODE_LPM; + mipi_dsi_msleep(&dsi_ctx, 150); return dsi_ctx.accum_err; diff --git a/drivers/gpu/drm/panel/panel-himax-hx83121a.c b/drivers/gpu/drm/panel/panel-himax-hx83121a.c index ebe643ba4184..bed79aa06f46 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83121a.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83121a.c @@ -596,8 +596,8 @@ static int himax_probe(struct mipi_dsi_device *dsi) ctx = devm_drm_panel_alloc(dev, struct himax, panel, &himax_panel_funcs, DRM_MODE_CONNECTOR_DSI); - if (!ctx) - return -ENOMEM; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); ret = devm_regulator_bulk_get_const(&dsi->dev, ARRAY_SIZE(himax_supplies), diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 711f5101aa04..074c0995ddc2 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -390,6 +390,8 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, true, timeout); if (!ret) ret = timeout ? -ETIMEDOUT : -EBUSY; + else if (ret > 0) + ret = 0; drm_gem_object_put(gem_obj); diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index 2bbb1168a3ff..1e6a2392d7c6 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -118,12 +118,13 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Complete initialization. */ ret = drm_dev_register(&qdev->ddev, ent->driver_data); if (ret) - goto modeset_cleanup; + goto poll_fini; drm_client_setup(&qdev->ddev, NULL); return 0; -modeset_cleanup: +poll_fini: + drm_kms_helper_poll_fini(&qdev->ddev); qxl_modeset_fini(qdev); unload: qxl_device_fini(qdev); @@ -154,6 +155,7 @@ qxl_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); + drm_kms_helper_poll_fini(dev); drm_dev_unregister(dev); drm_atomic_helper_shutdown(dev); if (pci_is_vga(pdev) && pdev->revision < 5) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 22321eb95b7d..703848fac189 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -2461,7 +2461,8 @@ static void ci_register_patching_mc_arb(struct radeon_device *rdev, if (patch && ((rdev->pdev->device == 0x67B0) || - (rdev->pdev->device == 0x67B1))) { + (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { if ((memory_clock > 100000) && (memory_clock <= 125000)) { tmp2 = (((0x31 * engine_clock) / 125000) - 1) & 0xff; *dram_timimg2 &= ~0x00ff0000; @@ -3304,7 +3305,8 @@ static int ci_populate_all_memory_levels(struct radeon_device *rdev) pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1; if ((dpm_table->mclk_table.count >= 2) && - ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1))) { + ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { pi->smc_state_table.MemoryLevel[1].MinVddc = pi->smc_state_table.MemoryLevel[0].MinVddc; pi->smc_state_table.MemoryLevel[1].MinVddcPhases = @@ -4493,7 +4495,8 @@ static int ci_register_patching_mc_seq(struct radeon_device *rdev, if (patch && ((rdev->pdev->device == 0x67B0) || - (rdev->pdev->device == 0x67B1))) { + (rdev->pdev->device == 0x67B1)) && + (rdev->pdev->revision == 0)) { for (i = 0; i < table->last; i++) { if (table->last >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) return -EINVAL; diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 3142ef4da7f4..9196f85db9ce 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -312,8 +312,10 @@ static int evergreen_surface_check(struct radeon_cs_parser *p, case ARRAY_2D_TILED_THIN1: return evergreen_surface_check_2d(p, surf, prefix); default: - dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", - __func__, __LINE__, prefix, surf->mode); + if (prefix) { + dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", + __func__, __LINE__, prefix, surf->mode); + } return -EINVAL; } return -EINVAL; diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c index b7397827889c..360a88ca8f0c 100644 --- a/drivers/gpu/drm/sti/sti_hda.c +++ b/drivers/gpu/drm/sti/sti_hda.c @@ -741,6 +741,7 @@ static int sti_hda_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct sti_hda *hda; struct resource *res; + int ret; DRM_INFO("%s\n", __func__); @@ -779,7 +780,9 @@ static int sti_hda_probe(struct platform_device *pdev) return PTR_ERR(hda->clk_hddac); } - drm_bridge_add(&hda->bridge); + ret = devm_drm_bridge_add(dev, &hda->bridge); + if (ret) + return ret; platform_set_drvdata(pdev, hda); @@ -788,10 +791,7 @@ static int sti_hda_probe(struct platform_device *pdev) static void sti_hda_remove(struct platform_device *pdev) { - struct sti_hda *hda = platform_get_drvdata(pdev); - component_del(&pdev->dev, &sti_hda_ops); - drm_bridge_remove(&hda->bridge); } static const struct of_device_id hda_of_match[] = { diff --git a/drivers/gpu/drm/sysfb/ofdrm.c b/drivers/gpu/drm/sysfb/ofdrm.c index d38ba70f4e0d..247cf13c80a0 100644 --- a/drivers/gpu/drm/sysfb/ofdrm.c +++ b/drivers/gpu/drm/sysfb/ofdrm.c @@ -350,6 +350,7 @@ static void ofdrm_pci_release(void *data) struct pci_dev *pcidev = data; pci_disable_device(pcidev); + pci_dev_put(pcidev); } static int ofdrm_device_init_pci(struct ofdrm_device *odev) @@ -375,6 +376,7 @@ static int ofdrm_device_init_pci(struct ofdrm_device *odev) if (ret) { drm_err(dev, "pci_enable_device(%s) failed: %d\n", dev_name(&pcidev->dev), ret); + pci_dev_put(pcidev); return ret; } ret = devm_add_action_or_reset(&pdev->dev, ofdrm_pci_release, pcidev); diff --git a/drivers/gpu/drm/tiny/appletbdrm.c b/drivers/gpu/drm/tiny/appletbdrm.c index 3bae91d7eefe..278bb23fe4c8 100644 --- a/drivers/gpu/drm/tiny/appletbdrm.c +++ b/drivers/gpu/drm/tiny/appletbdrm.c @@ -353,7 +353,7 @@ static int appletbdrm_primary_plane_helper_atomic_check(struct drm_plane *plane, frames_size + sizeof(struct appletbdrm_fb_request_footer), 16); - appletbdrm_state->request = kzalloc(request_size, GFP_KERNEL); + appletbdrm_state->request = kvzalloc(request_size, GFP_KERNEL); if (!appletbdrm_state->request) return -ENOMEM; @@ -543,7 +543,7 @@ static void appletbdrm_primary_plane_destroy_state(struct drm_plane *plane, { struct appletbdrm_plane_state *appletbdrm_state = to_appletbdrm_plane_state(state); - kfree(appletbdrm_state->request); + kvfree(appletbdrm_state->request); kfree(appletbdrm_state->response); __drm_gem_destroy_shadow_plane_state(&appletbdrm_state->base); diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index 222e4ae1abbd..5d8dc5efec77 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -761,25 +761,21 @@ static int bochs_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent ret = pcim_enable_device(pdev); if (ret) - goto err_free_dev; + return ret; pci_set_drvdata(pdev, dev); ret = bochs_load(bochs); if (ret) - goto err_free_dev; + return ret; ret = drm_dev_register(dev, 0); if (ret) - goto err_free_dev; + return ret; drm_client_setup(dev, NULL); return ret; - -err_free_dev: - drm_dev_put(dev); - return ret; } static void bochs_pci_remove(struct pci_dev *pdev) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d85f0a37ac35..bcd76f6bb7f0 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -739,7 +739,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, may_evict = (force_space && place->mem_type != TTM_PL_SYSTEM); ret = ttm_resource_alloc(bo, place, res, force_space ? &limit_pool : NULL); if (ret) { - if (ret != -ENOSPC && ret != -EAGAIN) { + if (ret != -ENOSPC) { dmem_cgroup_pool_state_put(limit_pool); return ret; } @@ -1177,17 +1177,13 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) bdev->funcs->swap_notify(bo); if (ttm_tt_is_populated(tt)) { - spin_lock(&bdev->lru_lock); - ttm_resource_del_bulk_move(bo->resource, bo); - spin_unlock(&bdev->lru_lock); - ret = ttm_tt_swapout(bdev, tt, swapout_walk->gfp_flags); - - spin_lock(&bdev->lru_lock); - if (ret) - ttm_resource_add_bulk_move(bo->resource, bo); - ttm_resource_move_to_lru_tail(bo->resource); - spin_unlock(&bdev->lru_lock); + if (!ret) { + spin_lock(&bdev->lru_lock); + ttm_resource_del_bulk_move_unevictable(bo->resource, bo); + ttm_resource_move_to_lru_tail(bo->resource); + spin_unlock(&bdev->lru_lock); + } } out: diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index f83b7d5ec6c6..3e3c201a0222 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -1112,19 +1112,14 @@ long ttm_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, if (lret < 0) return lret; - if (bo->bulk_move) { - spin_lock(&bdev->lru_lock); - ttm_resource_del_bulk_move(bo->resource, bo); - spin_unlock(&bdev->lru_lock); - } - lret = ttm_tt_backup(bdev, bo->ttm, (struct ttm_backup_flags) {.purge = flags.purge, .writeback = flags.writeback}); - if (lret <= 0 && bo->bulk_move) { + if (lret > 0) { spin_lock(&bdev->lru_lock); - ttm_resource_add_bulk_move(bo->resource, bo); + ttm_resource_del_bulk_move_unevictable(bo->resource, bo); + ttm_resource_move_to_lru_tail(bo->resource); spin_unlock(&bdev->lru_lock); } diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 26a3689e5fd9..278bbe7a11ad 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -206,6 +206,14 @@ error_free: return NULL; } +static void __free_pages_gpu_account(struct page *p, unsigned int order, + bool reclaim) +{ + mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE, + -(1 << order)); + __free_pages(p, order); +} + /* Reset the caching and pages of size 1 << order */ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, unsigned int order, struct page *p, bool reclaim) @@ -223,9 +231,7 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, #endif if (!pool || !ttm_pool_uses_dma_alloc(pool)) { - mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE, - -(1 << order)); - __free_pages(p, order); + __free_pages_gpu_account(p, order, reclaim); return; } @@ -606,7 +612,7 @@ static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore, */ ttm_pool_split_for_swap(restore->pool, p); copy_highpage(restore->alloced_page + i, p); - __free_pages(p, 0); + __free_pages_gpu_account(p, 0, false); } restore->restored_pages++; @@ -1068,7 +1074,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, if (flags->purge) { shrunken += num_pages; page->private = 0; - __free_pages(page, order); + __free_pages_gpu_account(page, order, false); memset(tt->pages + i, 0, num_pages * sizeof(*tt->pages)); } @@ -1109,7 +1115,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, } handle = shandle; tt->pages[i] = ttm_backup_handle_to_page_ptr(handle); - put_page(page); + __free_pages_gpu_account(page, 0, false); shrunken++; } diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 9f36631d48b6..154d6739256f 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -292,6 +292,19 @@ void ttm_resource_del_bulk_move(struct ttm_resource *res, ttm_lru_bulk_move_del(bo->bulk_move, res); } +/* + * Remove a resource from its bulk_move, bypassing the unevictable check. + * Use only when the resource is known to still be tracked in the range despite + * the BO having just become unevictable; asserts that this is the case. + */ +void ttm_resource_del_bulk_move_unevictable(struct ttm_resource *res, + struct ttm_buffer_object *bo) +{ + WARN_ON_ONCE(!ttm_resource_unevictable(res, bo)); + if (bo->bulk_move) + ttm_lru_bulk_move_del(bo->bulk_move, res); +} + /* Move a resource to the LRU or bulk tail */ void ttm_resource_move_to_lru_tail(struct ttm_resource *res) { @@ -385,8 +398,11 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo, if (man->cg) { ret = dmem_cgroup_try_charge(man->cg, bo->base.size, &pool, ret_limit_pool); - if (ret) + if (ret) { + if (ret == -EAGAIN) + ret = -ENOSPC; return ret; + } } ret = man->func->alloc(man, bo, place, res_ptr); diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 08a0e9480d70..17950fe3a0ec 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -285,13 +285,12 @@ static struct urb *udl_get_urb_locked(struct udl_device *udl, long timeout) return unode->urb; } -#define GET_URB_TIMEOUT HZ struct urb *udl_get_urb(struct udl_device *udl) { struct urb *urb; spin_lock_irq(&udl->urbs.lock); - urb = udl_get_urb_locked(udl, GET_URB_TIMEOUT); + urb = udl_get_urb_locked(udl, HZ * 2); spin_unlock_irq(&udl->urbs.lock); return urb; } diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index 231e829bd709..1ca073a4ecb2 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -21,6 +21,7 @@ #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_modeset_helper_vtables.h> +#include <drm/drm_print.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -342,8 +343,10 @@ static void udl_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atom return; urb = udl_get_urb(udl); - if (!urb) + if (!urb) { + drm_err_ratelimited(dev, "get urb failed when enabling crtc\n"); goto out; + } buf = (char *)urb->transfer_buffer; buf = udl_vidreg_lock(buf); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 1855ef5b3b5f..94bf628dc91c 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -126,20 +126,6 @@ v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, } static void -v3d_cpu_job_free(struct drm_sched_job *sched_job) -{ - struct v3d_cpu_job *job = to_cpu_job(sched_job); - - v3d_timestamp_query_info_free(&job->timestamp_query, - job->timestamp_query.count); - - v3d_performance_query_info_free(&job->performance_query, - job->performance_query.count); - - v3d_job_cleanup(&job->base); -} - -static void v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job) { struct v3d_perfmon *perfmon = v3d->global_perfmon; @@ -830,7 +816,7 @@ static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = { static const struct drm_sched_backend_ops v3d_cpu_sched_ops = { .run_job = v3d_cpu_job_run, - .free_job = v3d_cpu_job_free + .free_job = v3d_sched_job_free }; static int diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index ee4512db294b..3ddd53b6f437 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -123,6 +123,24 @@ v3d_render_job_free(struct kref *ref) v3d_job_free(ref); } +static void +v3d_cpu_job_free(struct kref *ref) +{ + struct v3d_cpu_job *job = container_of(ref, struct v3d_cpu_job, + base.refcount); + + v3d_timestamp_query_info_free(&job->timestamp_query, + job->timestamp_query.count); + + v3d_performance_query_info_free(&job->performance_query, + job->performance_query.count); + + if (job->indirect_csd.indirect) + drm_gem_object_put(job->indirect_csd.indirect); + + v3d_job_free(ref); +} + void v3d_job_cleanup(struct v3d_job *job) { if (!job) @@ -1302,7 +1320,7 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type); ret = v3d_job_init(v3d, file_priv, &cpu_job->base, - v3d_job_free, 0, &se, V3D_CPU); + v3d_cpu_job_free, 0, &se, V3D_CPU); if (ret) { v3d_job_deallocate((void *)&cpu_job); goto fail; @@ -1385,8 +1403,6 @@ fail: v3d_job_cleanup((void *)csd_job); v3d_job_cleanup(clean_job); v3d_put_multisync_post_deps(&se); - kvfree(cpu_job->timestamp_query.queries); - kvfree(cpu_job->performance_query.queries); return ret; } diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index f17660a71a3e..2f3531950aa4 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -317,6 +317,7 @@ virtio_gpu_array_from_handles(struct drm_file *drm_file, u32 *handles, u32 nents void virtio_gpu_array_add_obj(struct virtio_gpu_object_array *objs, struct drm_gem_object *obj); int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs); +int virtio_gpu_lock_one_resv_uninterruptible(struct virtio_gpu_object_array *objs); void virtio_gpu_array_unlock_resv(struct virtio_gpu_object_array *objs); void virtio_gpu_array_add_fence(struct virtio_gpu_object_array *objs, struct dma_fence *fence); diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index f22dc5c21cd4..435d37d36034 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -238,6 +238,23 @@ int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs) return ret; } +int virtio_gpu_lock_one_resv_uninterruptible(struct virtio_gpu_object_array *objs) +{ + int ret; + + if (objs->nents != 1) + return -EINVAL; + + dma_resv_lock(objs->objs[0]->resv, NULL); + + ret = dma_resv_reserve_fences(objs->objs[0]->resv, 1); + if (ret) { + virtio_gpu_array_unlock_resv(objs); + return ret; + } + return 0; +} + void virtio_gpu_array_unlock_resv(struct virtio_gpu_object_array *objs) { if (objs->nents == 1) { diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index a126d1b25f46..652352424744 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -215,7 +215,10 @@ static void virtio_gpu_resource_flush(struct drm_plane *plane, if (!objs) return; virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]); - virtio_gpu_array_lock_resv(objs); + if (virtio_gpu_lock_one_resv_uninterruptible(objs)) { + virtio_gpu_array_put_free(objs); + return; + } virtio_gpu_cmd_resource_flush(vgdev, bo->hw_res_handle, x, y, width, height, objs, vgplane_st->fence); @@ -459,7 +462,10 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, if (!objs) return; virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]); - virtio_gpu_array_lock_resv(objs); + if (virtio_gpu_lock_one_resv_uninterruptible(objs)) { + virtio_gpu_array_put_free(objs); + return; + } virtio_gpu_cmd_transfer_to_host_2d (vgdev, 0, plane->state->crtc_w, diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 49de1c22a469..03242e8b3d87 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -88,6 +88,7 @@ xe-y += xe_bb.o \ xe_irq.o \ xe_late_bind_fw.o \ xe_lrc.o \ + xe_mem_pool.o \ xe_migrate.o \ xe_mmio.o \ xe_mmio_gem.o \ diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 29c72aa4b0d2..33494b86205d 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -37,9 +37,17 @@ static bool intel_hdcp_gsc_check_status(struct drm_device *drm) struct xe_device *xe = to_xe_device(drm); struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *gt = tile->media_gt; - struct xe_gsc *gsc = >->uc.gsc; + struct xe_gsc *gsc; + + if (!gt) { + drm_dbg_kms(&xe->drm, + "not checking GSC status for HDCP2.x: media GT not present or disabled\n"); + return false; + } + + gsc = >->uc.gsc; - if (!gsc || !xe_uc_fw_is_available(&gsc->fw)) { + if (!xe_uc_fw_is_available(&gsc->fw)) { drm_dbg_kms(&xe->drm, "GSC Components not ready for HDCP2.x\n"); return false; diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 4ebaa0888a43..353fe0bd49bf 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -152,10 +152,11 @@ #define XEHPG_INSTDONE_GEOM_SVGUNIT XE_REG_MCR(0x666c) -#define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED) +#define CACHE_MODE_1 XE_REG_MCR(0x7004, XE_REG_OPTION_MASKED) #define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) #define COMMON_SLICE_CHICKEN1 XE_REG(0x7010, XE_REG_OPTION_MASKED) +#define XEHP_COMMON_SLICE_CHICKEN1 XE_REG_MCR(0x7010, XE_REG_OPTION_MASKED) #define DISABLE_BOTTOM_CLIP_RECTANGLE_TEST REG_BIT(14) #define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED) @@ -178,6 +179,7 @@ #define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108) #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) +#define XEHP_COMMON_SLICE_CHICKEN4 XE_REG_MCR(0x7300, XE_REG_OPTION_MASKED) #define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) #define HW_FILTERING REG_BIT(5) @@ -583,7 +585,7 @@ #define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) #define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32) -#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0) +#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0, XE_REG_OPTION_MASKED) #define CPSS_AWARE_DIS REG_BIT(3) #define SARB_CHICKEN1 XE_REG_MCR(0xe90c) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index a7c2dc7f224c..6b518858538f 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -897,10 +897,10 @@ void xe_bo_set_purgeable_state(struct xe_bo *bo, new_state == XE_MADV_PURGEABLE_PURGED); /* Once purged, always purged - cannot transition out */ - xe_assert(xe, !(bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED && + xe_assert(xe, !(bo->purgeable.state == XE_MADV_PURGEABLE_PURGED && new_state != XE_MADV_PURGEABLE_PURGED)); - bo->madv_purgeable = new_state; + bo->purgeable.state = new_state; xe_bo_set_purgeable_shrinker(bo, new_state); } @@ -2322,8 +2322,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, } /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */ - if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) + if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) { + xe_bo_free(bo); return ERR_PTR(-EINVAL); + } if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) && !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) && @@ -2342,8 +2344,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, alignment = SZ_4K >> PAGE_SHIFT; } - if (type == ttm_bo_type_device && aligned_size != size) + if (type == ttm_bo_type_device && aligned_size != size) { + xe_bo_free(bo); return ERR_PTR(-EINVAL); + } if (!bo) { bo = xe_bo_alloc(); @@ -2364,7 +2368,7 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo, INIT_LIST_HEAD(&bo->vram_userfault_link); /* Initialize purge advisory state */ - bo->madv_purgeable = XE_MADV_PURGEABLE_WILLNEED; + bo->purgeable.state = XE_MADV_PURGEABLE_WILLNEED; drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 68dea7d25a6b..6340317f7d2e 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -251,7 +251,7 @@ static inline bool xe_bo_is_protected(const struct xe_bo *bo) static inline bool xe_bo_is_purged(struct xe_bo *bo) { xe_bo_assert_held(bo); - return bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED; + return bo->purgeable.state == XE_MADV_PURGEABLE_PURGED; } /** @@ -268,11 +268,95 @@ static inline bool xe_bo_is_purged(struct xe_bo *bo) static inline bool xe_bo_madv_is_dontneed(struct xe_bo *bo) { xe_bo_assert_held(bo); - return bo->madv_purgeable == XE_MADV_PURGEABLE_DONTNEED; + return bo->purgeable.state == XE_MADV_PURGEABLE_DONTNEED; } void xe_bo_set_purgeable_state(struct xe_bo *bo, enum xe_madv_purgeable_state new_state); +/** + * xe_bo_willneed_get_locked() - Acquire a WILLNEED holder on a BO + * @bo: Buffer object + * + * Increments willneed_count and, on a 0->1 transition, promotes the BO + * from DONTNEED to WILLNEED. PURGED is terminal and is never modified. + * + * Caller must hold the BO's dma-resv lock. + */ +static inline void xe_bo_willneed_get_locked(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + /* Imported BOs are owned externally; do not track purgeability. */ + if (drm_gem_is_imported(&bo->ttm.base)) + return; + + if (bo->purgeable.willneed_count++ == 0 && xe_bo_madv_is_dontneed(bo)) + xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_WILLNEED); +} + +/** + * xe_bo_willneed_put_locked() - Release a WILLNEED holder on a BO + * @bo: Buffer object + * + * Decrements willneed_count and, on a 1->0 transition, marks the BO + * DONTNEED only if it still has VMAs (implying all active VMAs are + * DONTNEED). If the last VMA is being removed, preserve the current BO + * state to match the previous VMA-walk semantics. + * + * PURGED is terminal and the BO state is never modified. + * + * Caller must hold the BO's dma-resv lock. + */ +static inline void xe_bo_willneed_put_locked(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + if (drm_gem_is_imported(&bo->ttm.base)) + return; + + xe_assert(xe_bo_device(bo), bo->purgeable.willneed_count > 0); + if (--bo->purgeable.willneed_count == 0 && bo->purgeable.vma_count > 0 && + !xe_bo_is_purged(bo)) + xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_DONTNEED); +} + +/** + * xe_bo_vma_count_inc_locked() - Account a new VMA on a BO + * @bo: Buffer object + * + * Increments vma_count. + * + * Caller must hold the BO's dma-resv lock. + */ +static inline void xe_bo_vma_count_inc_locked(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + if (drm_gem_is_imported(&bo->ttm.base)) + return; + + bo->purgeable.vma_count++; +} + +/** + * xe_bo_vma_count_dec_locked() - Account a VMA removal on a BO + * @bo: Buffer object + * + * Decrements vma_count. + * + * Caller must hold the BO's dma-resv lock. + */ +static inline void xe_bo_vma_count_dec_locked(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + if (drm_gem_is_imported(&bo->ttm.base)) + return; + + xe_assert(xe_bo_device(bo), bo->purgeable.vma_count > 0); + bo->purgeable.vma_count--; +} + static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo) { if (likely(bo)) { diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index ff8317bfc1ae..077e35b4cdce 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -18,6 +18,7 @@ #include "xe_ggtt_types.h" struct xe_device; +struct xe_mem_pool_node; struct xe_vm; #define XE_BO_MAX_PLACEMENTS 3 @@ -88,7 +89,7 @@ struct xe_bo { bool ccs_cleared; /** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */ - struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; + struct xe_mem_pool_node *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT]; /** * @cpu_caching: CPU caching mode. Currently only used for userspace @@ -110,10 +111,32 @@ struct xe_bo { u64 min_align; /** - * @madv_purgeable: user space advise on BO purgeability, protected - * by BO's dma-resv lock. + * @purgeable: Purgeability state and accounting. + * + * All fields are protected by the BO's dma-resv lock. */ - u32 madv_purgeable; + struct { + /** + * @purgeable.state: BO purgeability state + * (WILLNEED/DONTNEED/PURGED). + */ + u32 state; + + /** + * @purgeable.vma_count: Number of VMAs currently mapping this BO. + */ + u32 vma_count; + + /** + * @purgeable.willneed_count: Number of active WILLNEED holders. + * + * Counts WILLNEED VMAs plus active dma-buf exports for + * non-imported BOs. The BO flips to DONTNEED on a 1->0 + * transition only when VMAs still exist; if the last VMA is + * removed, the previous BO state is preserved. + */ + u32 willneed_count; + } purgeable; }; #endif diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 7f9602b3363d..8a920e58245c 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -193,6 +193,18 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, return 0; } +static void xe_dma_buf_release(struct dma_buf *dmabuf) +{ + struct drm_gem_object *obj = dmabuf->priv; + struct xe_bo *bo = gem_to_xe_bo(obj); + + xe_bo_lock(bo, false); + xe_bo_willneed_put_locked(bo); + xe_bo_unlock(bo); + + drm_gem_dmabuf_release(dmabuf); +} + static const struct dma_buf_ops xe_dmabuf_ops = { .attach = xe_dma_buf_attach, .detach = xe_dma_buf_detach, @@ -200,7 +212,7 @@ static const struct dma_buf_ops xe_dmabuf_ops = { .unpin = xe_dma_buf_unpin, .map_dma_buf = xe_dma_buf_map, .unmap_dma_buf = xe_dma_buf_unmap, - .release = drm_gem_dmabuf_release, + .release = xe_dma_buf_release, .begin_cpu_access = xe_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, .vmap = drm_gem_dmabuf_vmap, @@ -241,26 +253,33 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags) ret = -EINVAL; goto out_unlock; } + + xe_bo_willneed_get_locked(bo); xe_bo_unlock(bo); ret = ttm_bo_setup_export(&bo->ttm, &ctx); if (ret) - return ERR_PTR(ret); + goto out_put; buf = drm_gem_prime_export(obj, flags); - if (!IS_ERR(buf)) - buf->ops = &xe_dmabuf_ops; + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_put; + } + buf->ops = &xe_dmabuf_ops; return buf; +out_put: + xe_bo_lock(bo, false); + xe_bo_willneed_put_locked(bo); out_unlock: xe_bo_unlock(bo); return ERR_PTR(ret); } static struct drm_gem_object * -xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, - struct dma_buf *dma_buf) +xe_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) { struct dma_resv *resv = dma_buf->resv; struct xe_device *xe = to_xe_device(dev); @@ -281,7 +300,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, if (ret) break; - bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size, + bo = xe_bo_init_locked(xe, NULL, NULL, resv, NULL, dma_buf->size, 0, /* Will require 1way or 2way for vm_bind */ ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec); drm_exec_retry_on_contention(&exec); @@ -332,7 +351,6 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, const struct dma_buf_attach_ops *attach_ops; struct dma_buf_attachment *attach; struct drm_gem_object *obj; - struct xe_bo *bo; if (dma_buf->ops == &xe_dmabuf_ops) { obj = dma_buf->priv; @@ -348,13 +366,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, } /* - * Don't publish the bo until we have a valid attachment, and a - * valid attachment needs the bo address. So pre-create a bo before - * creating the attachment and publish. + * This needs to happen before the attach, since it will create a new + * attachment for this, and add it to the list of attachments, at which + * point it is globally visible, and at any point the export side can + * call into on invalidate_mappings callback, which require a working + * object. */ - bo = xe_bo_alloc(); - if (IS_ERR(bo)) - return ERR_CAST(bo); + obj = xe_dma_buf_create_obj(dev, dma_buf); + if (IS_ERR(obj)) + return obj; attach_ops = &xe_dma_buf_attach_ops; #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) @@ -362,26 +382,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev, attach_ops = test->attach_ops; #endif - attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base); + attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, obj); if (IS_ERR(attach)) { - obj = ERR_CAST(attach); - goto out_err; + xe_bo_put(gem_to_xe_bo(obj)); + return ERR_CAST(attach); } - /* Errors here will take care of freeing the bo. */ - obj = xe_dma_buf_init_obj(dev, bo, dma_buf); - if (IS_ERR(obj)) - return obj; - - get_dma_buf(dma_buf); obj->import_attach = attach; return obj; - -out_err: - xe_bo_free(bo); - - return obj; } #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index c34408cfd292..dddcdd0bb7a3 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -869,14 +869,14 @@ static int xe_eu_stall_stream_close(struct inode *inode, struct file *file) struct xe_eu_stall_data_stream *stream = file->private_data; struct xe_gt *gt = stream->gt; - drm_dev_put(>->tile->xe->drm); - mutex_lock(>->eu_stall->stream_lock); xe_eu_stall_disable_locked(stream); xe_eu_stall_data_buf_destroy(stream); xe_eu_stall_stream_free(stream); mutex_unlock(>->eu_stall->stream_lock); + drm_dev_put(>->tile->xe->drm); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index b287d0e0e60a..071b8c41df43 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -1405,7 +1405,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (q->vm && q->hwe->hw_engine_group) { err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); if (err) - goto put_exec_queue; + goto kill_exec_queue; } } @@ -1416,12 +1416,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); if (err) - goto kill_exec_queue; + goto del_hw_engine_group; args->exec_queue_id = id; return 0; +del_hw_engine_group: + if (q->vm && q->hwe && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); kill_exec_queue: xe_exec_queue_kill(q); delete_queue_group: @@ -1760,7 +1763,7 @@ void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, unsigned int type) { - xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || + xe_assert(gt_to_xe(q->gt), type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); dma_fence_put(q->tlb_inval[type].last_fence); diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index e5c234f3d795..aab59dc647fb 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -166,7 +166,7 @@ static int query_compatibility_version(struct xe_gsc *gsc) &rd_offset); if (err) { xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err); - return err; + goto out_bo; } compat->major = version_query_rd(xe, &bo->vmap, rd_offset, proj_major); @@ -482,8 +482,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) EXEC_QUEUE_FLAG_PERMANENT, 0); if (IS_ERR(q)) { xe_gt_err(gt, "Failed to create queue for GSC submission\n"); - err = PTR_ERR(q); - goto out_bo; + return PTR_ERR(q); } wq = alloc_ordered_workqueue("gsc-ordered-wq", 0); @@ -506,8 +505,6 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) out_q: xe_exec_queue_put(q); -out_bo: - xe_bo_unpin_map_no_vm(bo); return err; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c index 87a164efcc33..01fe03b9efe8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c @@ -385,10 +385,10 @@ static int pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid, void *buf if (xe_gt_is_media_type(gt)) for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) - regs[n] = xe_mmio_read32(>->mmio, MED_VF_SW_FLAG(n)); + regs[n] = xe_mmio_read32(&mmio, MED_VF_SW_FLAG(n)); else for (n = 0; n < VF_SW_FLAG_COUNT; n++) - regs[n] = xe_mmio_read32(>->mmio, VF_SW_FLAG(n)); + regs[n] = xe_mmio_read32(&mmio, VF_SW_FLAG(n)); return 0; } @@ -407,10 +407,10 @@ static int pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid, if (xe_gt_is_media_type(gt)) for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++) - xe_mmio_write32(>->mmio, MED_VF_SW_FLAG(n), regs[n]); + xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), regs[n]); else for (n = 0; n < VF_SW_FLAG_COUNT; n++) - xe_mmio_write32(>->mmio, VF_SW_FLAG(n), regs[n]); + xe_mmio_write32(&mmio, VF_SW_FLAG(n), regs[n]); return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c index 7d532bded02a..a85ba4435378 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c @@ -114,8 +114,10 @@ int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 * VFs with no events are not printed. * * This function can only be called on PF. + * + * Return: always 0 */ -void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p) +int xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p) { unsigned int n, total_vfs = xe_gt_sriov_pf_get_totalvfs(gt); const struct xe_gt_sriov_monitor *data; @@ -144,4 +146,6 @@ void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p #undef __format #undef __value } + + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h index 7ca9351a271b..0b8f088d3a16 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h @@ -13,7 +13,7 @@ struct drm_printer; struct xe_gt; void xe_gt_sriov_pf_monitor_flr(struct xe_gt *gt, u32 vfid); -void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p); #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 8989c8e1be95..0cd9d77f3351 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -1137,13 +1137,15 @@ void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) } /** - * xe_gt_sriov_vf_print_config - Print VF self config. + * xe_gt_sriov_vf_print_config() - Print VF self config. * @gt: the &xe_gt * @p: the &drm_printer * * This function is for VF use only. + * + * Return: always 0. */ -void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) +int xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) { struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; struct xe_device *xe = gt_to_xe(gt); @@ -1170,16 +1172,20 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs); drm_printf(p, "GuC doorbells:\t%u\n", config->num_dbs); + + return 0; } /** - * xe_gt_sriov_vf_print_runtime - Print VF's runtime regs received from PF. + * xe_gt_sriov_vf_print_runtime() - Print VF's runtime regs received from PF. * @gt: the &xe_gt * @p: the &drm_printer * * This function is for VF use only. + * + * Return: always 0. */ -void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) +int xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) { struct vf_runtime_reg *vf_regs = gt->sriov.vf.runtime.regs; unsigned int size = gt->sriov.vf.runtime.num_regs; @@ -1188,16 +1194,20 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) for (; size--; vf_regs++) drm_printf(p, "%#x = %#x\n", vf_regs->offset, vf_regs->value); + + return 0; } /** - * xe_gt_sriov_vf_print_version - Print VF ABI versions. + * xe_gt_sriov_vf_print_version() - Print VF ABI versions. * @gt: the &xe_gt * @p: the &drm_printer * * This function is for VF use only. + * + * Return: always 0. */ -void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) +int xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; @@ -1227,6 +1237,8 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) GUC_RELAY_VERSION_LATEST_MAJOR, GUC_RELAY_VERSION_LATEST_MINOR); drm_printf(p, "\thandshake:\t%u.%u\n", pf_version->major, pf_version->minor); + + return 0; } static bool vf_post_migration_shutdown(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index a6f7127521a5..79878f21b1da 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -35,9 +35,9 @@ bool xe_gt_sriov_vf_sched_groups_enabled(struct xe_gt *gt); u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg); void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); -void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p); -void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p); -void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p); int xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt); int xe_vf_migration_fixups_complete_count(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 8b55cf25a75f..fffb5d631b69 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -145,6 +145,13 @@ struct xe_gt { /** @info.has_indirect_ring_state: GT has indirect ring state support */ u8 has_indirect_ring_state:1; /** + * @info.has_xe2_blt_instructions: GT supports Xe2-style MEM_SET + * and MEM_COPY blitter functionality. Note that despite the + * name, some Xe1 platforms may also support this "Xe2-style" + * feature. + */ + u8 has_xe2_blt_instructions:1; + /** * @info.num_geometry_xecore_fuse_regs: Number of 32b-bit fuse * registers the geometry XeCore mask spans. */ diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 81b5f01b1f65..2b835d48b565 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -512,12 +512,9 @@ static void guc_golden_lrc_init(struct xe_guc_ads *ads) * that starts after the execlists LRC registers. This is * required to allow the GuC to restore just the engine state * when a watchdog reset occurs. - * We calculate the engine state size by removing the size of - * what comes before it in the context image (which is identical - * on all engines). */ ads_blob_write(ads, ads.eng_state_size[guc_class], - real_size - xe_lrc_skip_size(xe)); + xe_lrc_engine_state_size(gt, class)); ads_blob_write(ads, ads.golden_context_lrca[guc_class], addr_ggtt); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index a145234f662b..912182dc7704 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -261,22 +261,10 @@ static void guc_submit_sw_fini(struct drm_device *drm, void *arg) static void guc_submit_fini(void *arg) { struct xe_guc *guc = arg; - - /* Forcefully kill any remaining exec queues */ - xe_guc_ct_stop(&guc->ct); - guc_submit_reset_prepare(guc); - xe_guc_softreset(guc); - xe_guc_submit_stop(guc); - xe_uc_fw_sanitize(&guc->fw); - xe_guc_submit_pause_abort(guc); -} - -static void guc_submit_wedged_fini(void *arg) -{ - struct xe_guc *guc = arg; struct xe_exec_queue *q; unsigned long index; + /* Drop any wedged queue refs */ mutex_lock(&guc->submission_state.lock); xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { if (exec_queue_wedged(q)) { @@ -286,6 +274,14 @@ static void guc_submit_wedged_fini(void *arg) } } mutex_unlock(&guc->submission_state.lock); + + /* Forcefully kill any remaining exec queues */ + xe_guc_ct_stop(&guc->ct); + guc_submit_reset_prepare(guc); + xe_guc_softreset(guc); + xe_guc_submit_stop(guc); + xe_uc_fw_sanitize(&guc->fw); + xe_guc_submit_pause_abort(guc); } static const struct xe_exec_queue_ops guc_exec_queue_ops; @@ -1320,10 +1316,8 @@ static void disable_scheduling_deregister(struct xe_guc *guc, void xe_guc_submit_wedge(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); - struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; unsigned long index; - int err; xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); @@ -1335,15 +1329,6 @@ void xe_guc_submit_wedge(struct xe_guc *guc) return; if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) { - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, - guc_submit_wedged_fini, guc); - if (err) { - xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; " - "Although device is wedged.\n", - xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); - return; - } - mutex_lock(&guc->submission_state.lock); xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) if (xe_exec_queue_get_unless_zero(q)) @@ -1688,6 +1673,14 @@ static void guc_exec_queue_fini(struct xe_exec_queue *q) struct xe_guc_exec_queue *ge = q->guc; struct xe_guc *guc = exec_queue_to_guc(q); + if (xe_exec_queue_is_multi_queue_secondary(q)) { + struct xe_exec_queue_group *group = q->multi_queue.group; + + mutex_lock(&group->list_lock); + list_del(&q->multi_queue.link); + mutex_unlock(&group->list_lock); + } + release_guc_id(guc, q); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); @@ -1709,14 +1702,6 @@ static void __guc_exec_queue_destroy_async(struct work_struct *w) guard(xe_pm_runtime)(guc_to_xe(guc)); trace_xe_exec_queue_destroy(q); - if (xe_exec_queue_is_multi_queue_secondary(q)) { - struct xe_exec_queue_group *group = q->multi_queue.group; - - mutex_lock(&group->list_lock); - list_del(&q->multi_queue.link); - mutex_unlock(&group->list_lock); - } - /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&ge->sched.base.work_tdr); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 9d12a0d2f0b5..4af9f0d7c6f3 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -746,9 +746,16 @@ size_t xe_lrc_reg_size(struct xe_device *xe) return 80 * sizeof(u32); } -size_t xe_lrc_skip_size(struct xe_device *xe) +/** + * xe_lrc_engine_state_size() - Get size of the engine state within LRC + * @gt: the &xe_gt struct instance + * @class: Hardware engine class + * + * Returns: Size of the engine state + */ +size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class) { - return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe); + return xe_gt_lrc_hang_replay_size(gt, class) - xe_lrc_reg_size(gt_to_xe(gt)); } static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) @@ -1214,7 +1221,7 @@ static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc, if (xe_gt_WARN_ON(lrc->gt, max_len < 3)) return -ENOSPC; - *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1); + *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_LRM_CS_MMIO | MI_LRI_NUM_REGS(1); *cmd++ = CS_DEBUG_MODE2(0).addr; *cmd++ = REG_MASKED_FIELD_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index e7c975f9e2d9..5440663183f6 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -130,7 +130,7 @@ u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc); struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc); size_t xe_lrc_reg_size(struct xe_device *xe); -size_t xe_lrc_skip_size(struct xe_device *xe); +size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class); void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, diff --git a/drivers/gpu/drm/xe/xe_mem_pool.c b/drivers/gpu/drm/xe/xe_mem_pool.c new file mode 100644 index 000000000000..d5e24d6aa88d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mem_pool.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2026 Intel Corporation + */ + +#include <linux/kernel.h> + +#include <drm/drm_managed.h> + +#include "instructions/xe_mi_commands.h" +#include "xe_bo.h" +#include "xe_device_types.h" +#include "xe_map.h" +#include "xe_mem_pool.h" +#include "xe_mem_pool_types.h" +#include "xe_tile_printk.h" + +/** + * struct xe_mem_pool - DRM MM pool for sub-allocating memory from a BO on an + * XE tile. + * + * The XE memory pool is a DRM MM manager that provides sub-allocation of memory + * from a backing buffer object (BO) on a specific XE tile. It is designed to + * manage memory for GPU workloads, allowing for efficient allocation and + * deallocation of memory regions within the BO. + * + * The memory pool maintains a primary BO that is pinned in the GGTT and mapped + * into the CPU address space for direct access. Optionally, it can also maintain + * a shadow BO that can be used for atomic updates to the primary BO's contents. + * + * The API provided by the memory pool allows clients to allocate and free memory + * regions, retrieve GPU and CPU addresses, and synchronize data between the + * primary and shadow BOs as needed. + */ +struct xe_mem_pool { + /** @base: Range allocator over [0, @size) in bytes */ + struct drm_mm base; + /** @bo: Active pool BO (GGTT-pinned, CPU-mapped). */ + struct xe_bo *bo; + /** @shadow: Shadow BO for atomic command updates. */ + struct xe_bo *shadow; + /** @swap_guard: Timeline guard updating @bo and @shadow */ + struct mutex swap_guard; + /** @cpu_addr: CPU virtual address of the active BO. */ + void *cpu_addr; + /** @is_iomem: Indicates if the BO mapping is I/O memory. */ + bool is_iomem; +}; + +static struct xe_mem_pool *node_to_pool(struct xe_mem_pool_node *node) +{ + return container_of(node->sa_node.mm, struct xe_mem_pool, base); +} + +static struct xe_tile *pool_to_tile(struct xe_mem_pool *pool) +{ + return pool->bo->tile; +} + +static void fini_pool_action(struct drm_device *drm, void *arg) +{ + struct xe_mem_pool *pool = arg; + + if (pool->is_iomem) + kvfree(pool->cpu_addr); + + drm_mm_takedown(&pool->base); +} + +static int pool_shadow_init(struct xe_mem_pool *pool) +{ + struct xe_tile *tile = pool->bo->tile; + struct xe_device *xe = tile_to_xe(tile); + struct xe_bo *shadow; + int ret; + + xe_assert(xe, !pool->shadow); + + ret = drmm_mutex_init(&xe->drm, &pool->swap_guard); + if (ret) + return ret; + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&pool->swap_guard); + fs_reclaim_release(GFP_KERNEL); + } + shadow = xe_managed_bo_create_pin_map(xe, tile, + xe_bo_size(pool->bo), + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); + if (IS_ERR(shadow)) + return PTR_ERR(shadow); + + pool->shadow = shadow; + + return 0; +} + +/** + * xe_mem_pool_init() - Initialize memory pool. + * @tile: the &xe_tile where allocate. + * @size: number of bytes to allocate. + * @guard: the size of the guard region at the end of the BO that is not + * sub-allocated, in bytes. + * @flags: flags to use to create shadow pool. + * + * Initializes a memory pool for sub-allocating memory from a backing BO on the + * specified XE tile. The backing BO is pinned in the GGTT and mapped into + * the CPU address space for direct access. Optionally, a shadow BO can also be + * initialized for atomic updates to the primary BO's contents. + * + * Returns: a pointer to the &xe_mem_pool, or an error pointer on failure. + */ +struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size, + u32 guard, int flags) +{ + struct xe_device *xe = tile_to_xe(tile); + struct xe_mem_pool *pool; + struct xe_bo *bo; + u32 managed_size; + int ret; + + xe_tile_assert(tile, size > guard); + managed_size = size - guard; + + pool = drmm_kzalloc(&xe->drm, sizeof(*pool), GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + + bo = xe_managed_bo_create_pin_map(xe, tile, size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_PINNED_NORESTORE); + if (IS_ERR(bo)) { + xe_tile_err(tile, "Failed to prepare %uKiB BO for mem pool (%pe)\n", + size / SZ_1K, bo); + return ERR_CAST(bo); + } + pool->bo = bo; + pool->is_iomem = bo->vmap.is_iomem; + + if (pool->is_iomem) { + pool->cpu_addr = kvzalloc(size, GFP_KERNEL); + if (!pool->cpu_addr) + return ERR_PTR(-ENOMEM); + } else { + pool->cpu_addr = bo->vmap.vaddr; + } + + if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) { + ret = pool_shadow_init(pool); + + if (ret) + goto out_err; + } + + drm_mm_init(&pool->base, 0, managed_size); + ret = drmm_add_action_or_reset(&xe->drm, fini_pool_action, pool); + if (ret) + return ERR_PTR(ret); + + return pool; + +out_err: + if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) + xe_tile_err(tile, + "Failed to initialize shadow BO for mem pool (%d)\n", ret); + if (bo->vmap.is_iomem) + kvfree(pool->cpu_addr); + return ERR_PTR(ret); +} + +/** + * xe_mem_pool_sync() - Copy the entire contents of the main pool to shadow pool. + * @pool: the memory pool containing the primary and shadow BOs. + * + * Copies the entire contents of the primary pool to the shadow pool. This must + * be done after xe_mem_pool_init() with the XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY + * flag to ensure that the shadow pool has the same initial contents as the primary + * pool. After this initial synchronization, clients can choose to synchronize the + * shadow pool with the primary pool on a node basis using + * xe_mem_pool_sync_shadow_locked() as needed. + * + * Return: None. + */ +void xe_mem_pool_sync(struct xe_mem_pool *pool) +{ + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + + xe_tile_assert(tile, pool->shadow); + + xe_map_memcpy_to(xe, &pool->shadow->vmap, 0, + pool->cpu_addr, xe_bo_size(pool->bo)); +} + +/** + * xe_mem_pool_swap_shadow_locked() - Swap the primary BO with the shadow BO. + * @pool: the memory pool containing the primary and shadow BOs. + * + * Swaps the primary buffer object with the shadow buffer object in the mem + * pool. This allows for atomic updates to the contents of the primary BO + * by first writing to the shadow BO and then swapping it with the primary BO. + * Swap_guard must be held to ensure synchronization with any concurrent swap + * operations. + * + * Return: None. + */ +void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool) +{ + struct xe_tile *tile = pool_to_tile(pool); + + xe_tile_assert(tile, pool->shadow); + lockdep_assert_held(&pool->swap_guard); + + swap(pool->bo, pool->shadow); + if (!pool->bo->vmap.is_iomem) + pool->cpu_addr = pool->bo->vmap.vaddr; +} + +/** + * xe_mem_pool_sync_shadow_locked() - Copy node from primary pool to shadow pool. + * @node: the node allocated in the memory pool. + * + * Copies the specified batch buffer from the primary pool to the shadow pool. + * Swap_guard must be held to ensure synchronization with any concurrent swap + * operations. + * + * Return: None. + */ +void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + struct drm_mm_node *sa_node = &node->sa_node; + + xe_tile_assert(tile, pool->shadow); + lockdep_assert_held(&pool->swap_guard); + + xe_map_memcpy_to(xe, &pool->shadow->vmap, + sa_node->start, + pool->cpu_addr + sa_node->start, + sa_node->size); +} + +/** + * xe_mem_pool_gpu_addr() - Retrieve GPU address of memory pool. + * @pool: the memory pool + * + * Returns: GGTT address of the memory pool. + */ +u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool) +{ + return xe_bo_ggtt_addr(pool->bo); +} + +/** + * xe_mem_pool_cpu_addr() - Retrieve CPU address of manager pool. + * @pool: the memory pool + * + * Returns: CPU virtual address of memory pool. + */ +void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool) +{ + return pool->cpu_addr; +} + +/** + * xe_mem_pool_bo_swap_guard() - Retrieve the mutex used to guard swap + * operations on a memory pool. + * @pool: the memory pool + * + * Returns: Swap guard mutex or NULL if shadow pool is not created. + */ +struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool) +{ + if (!pool->shadow) + return NULL; + + return &pool->swap_guard; +} + +/** + * xe_mem_pool_bo_flush_write() - Copy the data from the sub-allocation + * to the GPU memory. + * @node: the node allocated in the memory pool to flush. + */ +void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + struct drm_mm_node *sa_node = &node->sa_node; + + if (!pool->bo->vmap.is_iomem) + return; + + xe_map_memcpy_to(xe, &pool->bo->vmap, sa_node->start, + pool->cpu_addr + sa_node->start, + sa_node->size); +} + +/** + * xe_mem_pool_bo_sync_read() - Copy the data from GPU memory to the + * sub-allocation. + * @node: the node allocated in the memory pool to read back. + */ +void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + struct xe_tile *tile = pool_to_tile(pool); + struct xe_device *xe = tile_to_xe(tile); + struct drm_mm_node *sa_node = &node->sa_node; + + if (!pool->bo->vmap.is_iomem) + return; + + xe_map_memcpy_from(xe, pool->cpu_addr + sa_node->start, + &pool->bo->vmap, sa_node->start, sa_node->size); +} + +/** + * xe_mem_pool_alloc_node() - Allocate a new node for use with xe_mem_pool. + * + * Returns: node structure or an ERR_PTR(-ENOMEM). + */ +struct xe_mem_pool_node *xe_mem_pool_alloc_node(void) +{ + struct xe_mem_pool_node *node = kzalloc_obj(*node); + + if (!node) + return ERR_PTR(-ENOMEM); + + return node; +} + +/** + * xe_mem_pool_insert_node() - Insert a node into the memory pool. + * @pool: the memory pool to insert into + * @node: the node to insert + * @size: the size of the node to be allocated in bytes. + * + * Inserts a node into the specified memory pool using drm_mm for + * allocation. + * + * Returns: 0 on success or a negative error code on failure. + */ +int xe_mem_pool_insert_node(struct xe_mem_pool *pool, + struct xe_mem_pool_node *node, u32 size) +{ + if (!pool) + return -EINVAL; + + return drm_mm_insert_node(&pool->base, &node->sa_node, size); +} + +/** + * xe_mem_pool_free_node() - Free a node allocated from the memory pool. + * @node: the node to free + * + * Returns: None. + */ +void xe_mem_pool_free_node(struct xe_mem_pool_node *node) +{ + if (!node) + return; + + drm_mm_remove_node(&node->sa_node); + kfree(node); +} + +/** + * xe_mem_pool_node_cpu_addr() - Retrieve CPU address of the node. + * @node: the node allocated in the memory pool + * + * Returns: CPU virtual address of the node. + */ +void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node) +{ + struct xe_mem_pool *pool = node_to_pool(node); + + return xe_mem_pool_cpu_addr(pool) + node->sa_node.start; +} + +/** + * xe_mem_pool_dump() - Dump the state of the DRM MM manager for debugging. + * @pool: the memory pool info be dumped. + * @p: The DRM printer to use for output. + * + * Only the drm managed region is dumped, not the state of the BOs or any other + * pool information. + * + * Returns: None. + */ +void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p) +{ + drm_mm_print(&pool->base, p); +} diff --git a/drivers/gpu/drm/xe/xe_mem_pool.h b/drivers/gpu/drm/xe/xe_mem_pool.h new file mode 100644 index 000000000000..89cd2555fe91 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mem_pool.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ +#ifndef _XE_MEM_POOL_H_ +#define _XE_MEM_POOL_H_ + +#include <linux/sizes.h> +#include <linux/types.h> + +#include <drm/drm_mm.h> +#include "xe_mem_pool_types.h" + +struct drm_printer; +struct xe_mem_pool; +struct xe_tile; + +struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size, + u32 guard, int flags); +void xe_mem_pool_sync(struct xe_mem_pool *pool); +void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool); +void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node); +u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool); +void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool); +struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool); +void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node); +void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node); +struct xe_mem_pool_node *xe_mem_pool_alloc_node(void); +int xe_mem_pool_insert_node(struct xe_mem_pool *pool, + struct xe_mem_pool_node *node, u32 size); +void xe_mem_pool_free_node(struct xe_mem_pool_node *node); +void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node); +void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_mem_pool_types.h b/drivers/gpu/drm/xe/xe_mem_pool_types.h new file mode 100644 index 000000000000..d5e926c93351 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_mem_pool_types.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2026 Intel Corporation + */ + +#ifndef _XE_MEM_POOL_TYPES_H_ +#define _XE_MEM_POOL_TYPES_H_ + +#include <drm/drm_mm.h> + +#define XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY BIT(0) + +/** + * struct xe_mem_pool_node - Sub-range allocations from mem pool. + */ +struct xe_mem_pool_node { + /** @sa_node: drm_mm_node for this allocation. */ + struct drm_mm_node sa_node; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 811e07136efb..579af47edc61 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -427,13 +427,25 @@ static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector, return __memirq_received(memirq, vector, offset, name, true); } +static void memirq_assume_received(struct xe_memirq *memirq, const char *source, + u16 offset, const char *status) +{ + memirq_debug(memirq, "ASSUME %s %s(%u)\n", source, status, offset); +} + static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *status, struct xe_hw_engine *hwe) { memirq_debug(memirq, "STATUS %s %*ph\n", hwe->name, 16, status->vaddr); - if (memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name)) - xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT); + /* + * The programming note says to assume that GT_MI_USER_INTERRUPT is always + * set. Check and clear related status byte just for a debug. + */ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEMIRQ) && + !memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name)) + memirq_assume_received(memirq, hwe->name, ilog2(GT_MI_USER_INTERRUPT), "USER"); + xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT); } static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *status, @@ -443,8 +455,14 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat memirq_debug(memirq, "STATUS %s %*ph\n", name, 16, status->vaddr); - if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) - xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); + /* + * The programming note says to assume that GUC_INTR_GUC2HOST is always + * set. Check and clear related status byte just for a debug. + */ + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEMIRQ) && + !memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) + memirq_assume_received(memirq, name, ilog2(GUC_INTR_GUC2HOST), "GUC2HOST"); + xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); /* * This is a software interrupt that must be cleared after it's consumed diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index fc918b4fba54..a22413f892a0 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -29,6 +29,7 @@ #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_map.h" +#include "xe_mem_pool.h" #include "xe_mocs.h" #include "xe_printk.h" #include "xe_pt.h" @@ -1166,11 +1167,12 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, u32 batch_size, batch_size_allocated; struct xe_device *xe = gt_to_xe(gt); struct xe_res_cursor src_it, ccs_it; + struct xe_mem_pool *bb_pool; struct xe_sriov_vf_ccs_ctx *ctx; - struct xe_sa_manager *bb_pool; u64 size = xe_bo_size(src_bo); - struct xe_bb *bb = NULL; + struct xe_mem_pool_node *bb; u64 src_L0, src_L0_ofs; + struct xe_bb xe_bb_tmp; u32 src_L0_pt; int err; @@ -1208,18 +1210,18 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, size -= src_L0; } - bb = xe_bb_alloc(gt); + bb = xe_mem_pool_alloc_node(); if (IS_ERR(bb)) return PTR_ERR(bb); bb_pool = ctx->mem.ccs_bb_pool; - scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) { - xe_sa_bo_swap_shadow(bb_pool); + scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) { + xe_mem_pool_swap_shadow_locked(bb_pool); - err = xe_bb_init(bb, bb_pool, batch_size); + err = xe_mem_pool_insert_node(bb_pool, bb, batch_size * sizeof(u32)); if (err) { xe_gt_err(gt, "BB allocation failed.\n"); - xe_bb_free(bb, NULL); + kfree(bb); return err; } @@ -1227,6 +1229,7 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, size = xe_bo_size(src_bo); batch_size = 0; + xe_bb_tmp = (struct xe_bb){ .cs = xe_mem_pool_node_cpu_addr(bb), .len = 0 }; /* * Emit PTE and copy commands here. * The CCS copy command can only support limited size. If the size to be @@ -1255,24 +1258,27 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE)); batch_size += EMIT_COPY_CCS_DW; - emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src); + emit_pte(m, &xe_bb_tmp, src_L0_pt, false, true, &src_it, src_L0, src); - emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src); + emit_pte(m, &xe_bb_tmp, ccs_pt, false, false, &ccs_it, ccs_size, src); - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); - flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt, + xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len, + flush_flags); + flush_flags = xe_migrate_ccs_copy(m, &xe_bb_tmp, src_L0_ofs, src_is_pltt, src_L0_ofs, dst_is_pltt, src_L0, ccs_ofs, true); - bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags); + xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len, + flush_flags); size -= src_L0; } - xe_assert(xe, (batch_size_allocated == bb->len)); + xe_assert(xe, (batch_size_allocated == xe_bb_tmp.len)); + xe_assert(xe, bb->sa_node.size == xe_bb_tmp.len * sizeof(u32)); src_bo->bb_ccs[read_write] = bb; xe_sriov_vf_ccs_rw_update_bb_addr(ctx); - xe_sa_bo_sync_shadow(bb->bo); + xe_mem_pool_sync_shadow_locked(bb); } return 0; @@ -1297,10 +1303,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q, void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, enum xe_sriov_vf_ccs_rw_ctxs read_write) { - struct xe_bb *bb = src_bo->bb_ccs[read_write]; + struct xe_mem_pool_node *bb = src_bo->bb_ccs[read_write]; struct xe_device *xe = xe_bo_device(src_bo); + struct xe_mem_pool *bb_pool; struct xe_sriov_vf_ccs_ctx *ctx; - struct xe_sa_manager *bb_pool; u32 *cs; xe_assert(xe, IS_SRIOV_VF(xe)); @@ -1308,17 +1314,17 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo, ctx = &xe->sriov.vf.ccs.contexts[read_write]; bb_pool = ctx->mem.ccs_bb_pool; - guard(mutex) (xe_sa_bo_swap_guard(bb_pool)); - xe_sa_bo_swap_shadow(bb_pool); - - cs = xe_sa_bo_cpu_addr(bb->bo); - memset(cs, MI_NOOP, bb->len * sizeof(u32)); - xe_sriov_vf_ccs_rw_update_bb_addr(ctx); + scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) { + xe_mem_pool_swap_shadow_locked(bb_pool); - xe_sa_bo_sync_shadow(bb->bo); + cs = xe_mem_pool_node_cpu_addr(bb); + memset(cs, MI_NOOP, bb->sa_node.size); + xe_sriov_vf_ccs_rw_update_bb_addr(ctx); - xe_bb_free(bb, NULL); - src_bo->bb_ccs[read_write] = NULL; + xe_mem_pool_sync_shadow_locked(bb); + xe_mem_pool_free_node(bb); + src_bo->bb_ccs[read_write] = NULL; + } } /** @@ -1518,23 +1524,9 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb, bb->len += len; } -static bool has_service_copy_support(struct xe_gt *gt) -{ - /* - * What we care about is whether the architecture was designed with - * service copy functionality (specifically the new MEM_SET / MEM_COPY - * instructions) so check the architectural engine list rather than the - * actual list since these instructions are usable on BCS0 even if - * all of the actual service copy engines (BCS1-BCS8) have been fused - * off. - */ - return gt->info.engine_mask & GENMASK(XE_HW_ENGINE_BCS8, - XE_HW_ENGINE_BCS1); -} - static u32 emit_clear_cmd_len(struct xe_gt *gt) { - if (has_service_copy_support(gt)) + if (gt->info.has_xe2_blt_instructions) return PVC_MEM_SET_CMD_LEN_DW; else return XY_FAST_COLOR_BLT_DW; @@ -1543,7 +1535,7 @@ static u32 emit_clear_cmd_len(struct xe_gt *gt) static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, u32 size, u32 pitch, bool is_vram) { - if (has_service_copy_support(gt)) + if (gt->info.has_xe2_blt_instructions) emit_clear_link_copy(gt, bb, src_ofs, size, pitch); else emit_clear_main_copy(gt, bb, src_ofs, size, pitch, diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 6337e671c97a..d908f4e03906 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -2032,8 +2032,10 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) return -ENOENT; - if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) - return -EOPNOTSUPP; + if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) { + ret = -EOPNOTSUPP; + goto err_exec_q; + } } /* diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 01673d2b2464..c2ecd27ec770 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -118,6 +118,7 @@ static const struct xe_graphics_desc graphics_xe2 = { static const struct xe_graphics_desc graphics_xe3p_lpg = { XE2_GFX_FEATURES, + .has_indirect_ring_state = 1, .multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE), .num_geometry_xecore_fuse_regs = 3, .num_compute_xecore_fuse_regs = 3, @@ -851,6 +852,15 @@ static struct xe_gt *alloc_primary_gt(struct xe_tile *tile, gt->info.num_compute_xecore_fuse_regs = graphics_desc->num_compute_xecore_fuse_regs; /* + * Even if the service copy engines wind up being fused off, their + * presence in the IP descriptor indicates that the platform supports + * Xe2-style MEM_SET and MEM_COPY functionality. + */ + if (graphics_desc->hw_engine_mask & GENMASK(XE_HW_ENGINE_BCS8, + XE_HW_ENGINE_BCS1)) + gt->info.has_xe2_blt_instructions = true; + + /* * Before media version 13, the media IP was part of the primary GT * so we need to add the media engines to the primary GT's engine list. */ diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 80577e4b7437..8cc313182968 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -226,7 +226,7 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent, } range_start = reg & REG_GENMASK(25, range_bit); - range_end = range_start | REG_GENMASK(range_bit, 0); + range_end = range_start | REG_GENMASK(range_bit - 1, 0); switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) { case RING_FORCE_TO_NONPRIV_ACCESS_RW: diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c index 6c4b16409cc9..150a241110fb 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c @@ -149,10 +149,11 @@ pf_migration_consume(struct xe_device *xe, unsigned int vfid) for_each_gt(gt, xe, gt_id) { data = xe_gt_sriov_pf_migration_save_consume(gt, vfid); - if (data && PTR_ERR(data) != EAGAIN) + if (!data) + continue; + if (!IS_ERR(data) || PTR_ERR(data) != -EAGAIN) return data; - if (PTR_ERR(data) == -EAGAIN) - more_data = true; + more_data = true; } if (!more_data) diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c index db023fb66a27..09b99fb2608b 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c @@ -14,9 +14,9 @@ #include "xe_guc.h" #include "xe_guc_submit.h" #include "xe_lrc.h" +#include "xe_mem_pool.h" #include "xe_migrate.h" #include "xe_pm.h" -#include "xe_sa.h" #include "xe_sriov_printk.h" #include "xe_sriov_vf.h" #include "xe_sriov_vf_ccs.h" @@ -141,43 +141,47 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe) static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx) { + struct xe_mem_pool *pool; struct xe_device *xe = tile_to_xe(tile); - struct xe_sa_manager *sa_manager; + u32 *pool_cpu_addr, *last_dw_addr; u64 bb_pool_size; - int offset, err; + int err; bb_pool_size = get_ccs_bb_pool_size(xe); xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n", ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M); - sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16, - XE_SA_BO_MANAGER_FLAG_SHADOW); - - if (IS_ERR(sa_manager)) { - xe_sriov_err(xe, "Suballocator init failed with error: %pe\n", - sa_manager); - err = PTR_ERR(sa_manager); + pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32), + XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY); + if (IS_ERR(pool)) { + xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n", + pool); + err = PTR_ERR(pool); return err; } - offset = 0; - xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP, - bb_pool_size); - xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP, - bb_pool_size); + pool_cpu_addr = xe_mem_pool_cpu_addr(pool); + memset(pool_cpu_addr, 0, bb_pool_size); - offset = bb_pool_size - sizeof(u32); - xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END); - xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END); + last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1; + *last_dw_addr = MI_BATCH_BUFFER_END; - ctx->mem.ccs_bb_pool = sa_manager; + /** + * Sync the main copy and shadow copy so that the shadow copy is + * replica of main copy. We sync only BBs after init part. So, we + * need to make sure the main pool and shadow copy are in sync after + * this point. This is needed as GuC may read the BB commands from + * shadow copy. + */ + xe_mem_pool_sync(pool); + ctx->mem.ccs_bb_pool = pool; return 0; } static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx) { - u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool); struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); u32 dw[10], i = 0; @@ -388,7 +392,7 @@ err_ret: #define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32)) void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx) { - u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool); + u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool); struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q); struct xe_device *xe = gt_to_xe(ctx->mig_q->gt); @@ -412,8 +416,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo) struct xe_device *xe = xe_bo_device(bo); enum xe_sriov_vf_ccs_rw_ctxs ctx_id; struct xe_sriov_vf_ccs_ctx *ctx; + struct xe_mem_pool_node *bb; struct xe_tile *tile; - struct xe_bb *bb; int err = 0; xe_assert(xe, IS_VF_CCS_READY(xe)); @@ -445,7 +449,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) { struct xe_device *xe = xe_bo_device(bo); enum xe_sriov_vf_ccs_rw_ctxs ctx_id; - struct xe_bb *bb; + struct xe_mem_pool_node *bb; xe_assert(xe, IS_VF_CCS_READY(xe)); @@ -471,8 +475,8 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo) */ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) { - struct xe_sa_manager *bb_pool; enum xe_sriov_vf_ccs_rw_ctxs ctx_id; + struct xe_mem_pool *bb_pool; if (!IS_VF_CCS_READY(xe)) return; @@ -485,7 +489,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p) drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read"); drm_printf(p, "-------------------------\n"); - drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool)); + xe_mem_pool_dump(bb_pool, p); drm_puts(p, "\n"); } } diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h index 22c499943d2a..6fc8f97ef3f4 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h @@ -17,9 +17,6 @@ enum xe_sriov_vf_ccs_rw_ctxs { XE_SRIOV_VF_CCS_CTX_COUNT }; -struct xe_migrate; -struct xe_sa_manager; - /** * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data. */ @@ -33,7 +30,7 @@ struct xe_sriov_vf_ccs_ctx { /** @mem: memory data */ struct { /** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */ - struct xe_sa_manager *ccs_bb_pool; + struct xe_mem_pool *ccs_bb_pool; } mem; }; diff --git a/drivers/gpu/drm/xe/xe_tile_types.h b/drivers/gpu/drm/xe/xe_tile_types.h index 33932fd547d7..0048100ccb72 100644 --- a/drivers/gpu/drm/xe/xe_tile_types.h +++ b/drivers/gpu/drm/xe/xe_tile_types.h @@ -106,8 +106,6 @@ struct xe_tile { struct xe_lmtt lmtt; } pf; struct { - /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ - struct xe_ggtt_node *ggtt_balloon[2]; /** @sriov.vf.self_config: VF configuration data */ struct xe_tile_sriov_vf_selfconfig self_config; } vf; diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index f8de6a4bf189..fcb6698abc6e 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -97,7 +97,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { { XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED), IS_INTEGRATED), - XE_RTP_ACTIONS(FIELD_SET(XEHP_GAMSTLB_CTRL, BANK_HASH_MODE, + XE_RTP_ACTIONS(FIELD_SET(GAMSTLB_CTRL, BANK_HASH_MODE, BANK_HASH_4KB_MODE)) }, }; @@ -129,7 +129,7 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { static const struct xe_rtp_entry_sr lrc_tunings[] = { { XE_RTP_NAME("Tuning: Windower HW Filtering"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3599), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, HW_FILTERING)) + XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN4, HW_FILTERING)) }, /* DG2 */ diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 56e2db50bb36..ab6cc1f0a789 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1120,6 +1120,25 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, xe_bo_assert_held(bo); + /* + * Reject only WILLNEED mappings on DONTNEED/PURGED BOs. This + * gates new vm_bind ioctls (user supplies WILLNEED) while + * still allowing partial-unbind / remap splits whose new VMAs + * inherit the parent's DONTNEED attr. It must also run before + * xe_bo_willneed_get_locked() below so a 0->1 holder bump + * cannot silently promote DONTNEED back to WILLNEED. + */ + if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) { + if (xe_bo_madv_is_dontneed(bo)) { + xe_vma_free(vma); + return ERR_PTR(-EBUSY); + } + if (xe_bo_is_purged(bo)) { + xe_vma_free(vma); + return ERR_PTR(-EINVAL); + } + } + vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base); if (IS_ERR(vm_bo)) { xe_vma_free(vma); @@ -1131,6 +1150,10 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.gem.offset = bo_offset_or_userptr; drm_gpuva_link(&vma->gpuva, vm_bo); drm_gpuvm_bo_put(vm_bo); + + xe_bo_vma_count_inc_locked(bo); + if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) + xe_bo_willneed_get_locked(bo); } else /* userptr or null */ { if (!is_null && !is_cpu_addr_mirror) { struct xe_userptr_vma *uvma = to_userptr_vma(vma); @@ -1208,7 +1231,10 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_bo_assert_held(bo); drm_gpuva_unlink(&vma->gpuva); - xe_bo_recompute_purgeable_state(bo); + + xe_bo_vma_count_dec_locked(bo); + if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) + xe_bo_willneed_put_locked(bo); } xe_vm_assert_held(vm); @@ -3016,7 +3042,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, * @res_evict: Allow evicting resources during validation * @validate: Perform BO validation * @request_decompress: Request BO decompression - * @check_purged: Reject operation if BO is purged + * @check_purged: Reject operation if BO is DONTNEED or PURGED */ struct xe_vma_lock_and_validate_flags { u32 res_evict : 1; @@ -3030,6 +3056,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); + bool validate_bo = flags.validate; int err = 0; if (bo) { @@ -3044,7 +3071,11 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = -EINVAL; /* BO already purged */ } - if (!err && flags.validate) + /* Don't validate the BO for DONTNEED/PURGED remap remnants. */ + if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_WILLNEED) + validate_bo = false; + + if (!err && validate_bo) err = xe_bo_validate(bo, vm, xe_vm_allow_vm_eviction(vm) && flags.res_evict, exec); @@ -3152,7 +3183,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, op->map.immediate, .request_decompress = op->map.request_decompress, - .check_purged = true, + .check_purged = false, }); break; case DRM_GPUVA_OP_REMAP: @@ -3174,7 +3205,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, .res_evict = res_evict, .validate = true, .request_decompress = false, - .check_purged = true, + .check_purged = false, }); if (!err && op->remap.next) err = vma_lock_and_validate(exec, op->remap.next, @@ -3182,7 +3213,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, .res_evict = res_evict, .validate = true, .request_decompress = false, - .check_purged = true, + .check_purged = false, }); break; case DRM_GPUVA_OP_UNMAP: @@ -3211,9 +3242,11 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, } /* - * Prefetch attempts to migrate BO's backing store without - * repopulating it first. Purged BOs have no backing store - * to migrate, so reject the operation. + * PREFETCH is the only op that still gates on BO purge state. + * MAP/REMAP handle this inside xe_vma_create() so partial + * unbind on a DONTNEED BO still works. PREFETCH skips + * xe_vma_create() and would migrate a BO with no backing + * store, so reject DONTNEED/PURGED here. */ err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.prefetch.va), @@ -3658,6 +3691,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE && + is_cpu_addr_mirror) || XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) && (op == DRM_XE_VM_BIND_OP_MAP_USERPTR || is_cpu_addr_mirror) && @@ -4156,7 +4191,8 @@ int xe_vm_get_property_ioctl(struct drm_device *drm, void *data, int ret = 0; if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] || - args->reserved[2]))) + args->reserved[2] || args->extensions || + args->pad))) return -EINVAL; vm = xe_vm_lookup(xef, args->vm_id); diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c index 66f00d3f5c07..c4fb29004195 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.c +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c @@ -186,147 +186,6 @@ static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, } /** - * xe_bo_is_dmabuf_shared() - Check if BO is shared via dma-buf - * @bo: Buffer object - * - * Prevent marking imported or exported dma-bufs as purgeable. - * For imported BOs, Xe doesn't own the backing store and cannot - * safely reclaim pages (exporter or other devices may still be - * using them). For exported BOs, external devices may have active - * mappings we cannot track. - * - * Return: true if BO is imported or exported, false otherwise - */ -static bool xe_bo_is_dmabuf_shared(struct xe_bo *bo) -{ - struct drm_gem_object *obj = &bo->ttm.base; - - /* Imported: exporter owns backing store */ - if (drm_gem_is_imported(obj)) - return true; - - /* Exported: external devices may be accessing */ - if (obj->dma_buf) - return true; - - return false; -} - -/** - * enum xe_bo_vmas_purge_state - VMA purgeable state aggregation - * - * Distinguishes whether a BO's VMAs are all DONTNEED, have at least - * one WILLNEED, or have no VMAs at all. - * - * Enum values align with XE_MADV_PURGEABLE_* states for consistency. - */ -enum xe_bo_vmas_purge_state { - /** @XE_BO_VMAS_STATE_WILLNEED: At least one VMA is WILLNEED */ - XE_BO_VMAS_STATE_WILLNEED = 0, - /** @XE_BO_VMAS_STATE_DONTNEED: All VMAs are DONTNEED */ - XE_BO_VMAS_STATE_DONTNEED = 1, - /** @XE_BO_VMAS_STATE_NO_VMAS: BO has no VMAs */ - XE_BO_VMAS_STATE_NO_VMAS = 2, -}; - -/* - * xe_bo_recompute_purgeable_state() casts between xe_bo_vmas_purge_state and - * xe_madv_purgeable_state. Enforce that WILLNEED=0 and DONTNEED=1 match across - * both enums so the single-line cast is always valid. - */ -static_assert(XE_BO_VMAS_STATE_WILLNEED == (int)XE_MADV_PURGEABLE_WILLNEED, - "VMA purge state WILLNEED must equal madv purgeable WILLNEED"); -static_assert(XE_BO_VMAS_STATE_DONTNEED == (int)XE_MADV_PURGEABLE_DONTNEED, - "VMA purge state DONTNEED must equal madv purgeable DONTNEED"); - -/** - * xe_bo_all_vmas_dontneed() - Determine BO VMA purgeable state - * @bo: Buffer object - * - * Check all VMAs across all VMs to determine aggregate purgeable state. - * Shared BOs require unanimous DONTNEED state from all mappings. - * - * Caller must hold BO dma-resv lock. - * - * Return: XE_BO_VMAS_STATE_DONTNEED if all VMAs are DONTNEED, - * XE_BO_VMAS_STATE_WILLNEED if at least one VMA is not DONTNEED, - * XE_BO_VMAS_STATE_NO_VMAS if BO has no VMAs - */ -static enum xe_bo_vmas_purge_state xe_bo_all_vmas_dontneed(struct xe_bo *bo) -{ - struct drm_gpuvm_bo *vm_bo; - struct drm_gpuva *gpuva; - struct drm_gem_object *obj = &bo->ttm.base; - bool has_vmas = false; - - xe_bo_assert_held(bo); - - /* Shared dma-bufs cannot be purgeable */ - if (xe_bo_is_dmabuf_shared(bo)) - return XE_BO_VMAS_STATE_WILLNEED; - - drm_gem_for_each_gpuvm_bo(vm_bo, obj) { - drm_gpuvm_bo_for_each_va(gpuva, vm_bo) { - struct xe_vma *vma = gpuva_to_vma(gpuva); - - has_vmas = true; - - /* Any non-DONTNEED VMA prevents purging */ - if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_DONTNEED) - return XE_BO_VMAS_STATE_WILLNEED; - } - } - - /* - * No VMAs => preserve existing BO purgeable state. - * Avoids incorrectly flipping DONTNEED -> WILLNEED when last VMA unmapped. - */ - if (!has_vmas) - return XE_BO_VMAS_STATE_NO_VMAS; - - return XE_BO_VMAS_STATE_DONTNEED; -} - -/** - * xe_bo_recompute_purgeable_state() - Recompute BO purgeable state from VMAs - * @bo: Buffer object - * - * Walk all VMAs to determine if BO should be purgeable or not. - * Shared BOs require unanimous DONTNEED state from all mappings. - * If the BO has no VMAs the existing state is preserved. - * - * Locking: Caller must hold BO dma-resv lock. When iterating GPUVM lists, - * VM lock must also be held (write) to prevent concurrent VMA modifications. - * This is satisfied at both call sites: - * - xe_vma_destroy(): holds vm->lock write - * - madvise_purgeable(): holds vm->lock write (from madvise ioctl path) - * - * Return: nothing - */ -void xe_bo_recompute_purgeable_state(struct xe_bo *bo) -{ - enum xe_bo_vmas_purge_state vma_state; - - if (!bo) - return; - - xe_bo_assert_held(bo); - - /* - * Once purged, always purged. Cannot transition back to WILLNEED. - * This matches i915 semantics where purged BOs are permanently invalid. - */ - if (bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED) - return; - - vma_state = xe_bo_all_vmas_dontneed(bo); - - if (vma_state != (enum xe_bo_vmas_purge_state)bo->madv_purgeable && - vma_state != XE_BO_VMAS_STATE_NO_VMAS) - xe_bo_set_purgeable_state(bo, (enum xe_madv_purgeable_state)vma_state); -} - -/** * madvise_purgeable - Handle purgeable buffer object advice * @xe: XE device * @vm: VM @@ -359,12 +218,6 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm, /* BO must be locked before modifying madv state */ xe_bo_assert_held(bo); - /* Skip shared dma-bufs - no PTEs to zap */ - if (xe_bo_is_dmabuf_shared(bo)) { - vmas[i]->skip_invalidation = true; - continue; - } - /* * Once purged, always purged. Cannot transition back to WILLNEED. * This matches i915 semantics where purged BOs are permanently invalid. @@ -377,13 +230,14 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm, switch (op->purge_state_val.val) { case DRM_XE_VMA_PURGEABLE_STATE_WILLNEED: - vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED; vmas[i]->skip_invalidation = true; - - xe_bo_recompute_purgeable_state(bo); + /* Only act on a real DONTNEED -> WILLNEED transition. */ + if (vmas[i]->attr.purgeable_state == XE_MADV_PURGEABLE_DONTNEED) { + vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED; + xe_bo_willneed_get_locked(bo); + } break; case DRM_XE_VMA_PURGEABLE_STATE_DONTNEED: - vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED; /* * Don't zap PTEs at DONTNEED time -- pages are still * alive. The zap happens in xe_bo_move_notify() right @@ -391,7 +245,11 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm, */ vmas[i]->skip_invalidation = true; - xe_bo_recompute_purgeable_state(bo); + /* Only act on a real WILLNEED -> DONTNEED transition. */ + if (vmas[i]->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) { + vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED; + xe_bo_willneed_put_locked(bo); + } break; default: /* Should never hit - values validated in madvise_args_are_sane() */ @@ -621,6 +479,45 @@ static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details return 0; } +static bool check_pat_args_are_sane(struct xe_device *xe, + struct xe_vmas_in_madvise_range *madvise_range, + u16 pat_index) +{ + u16 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + int i; + + /* + * Using coh_none with CPU cached buffers is not allowed on iGPU. + * On iGPU the GPU shares the LLC with the CPU, so with coh_none + * the GPU bypasses CPU caches and reads directly from DRAM, + * potentially seeing stale sensitive data from previously freed + * pages. On dGPU this restriction does not apply, because the + * platform does not provide a non-coherent system memory access + * path that would violate the DMA coherency contract. + */ + if (coh_mode != XE_COH_NONE || IS_DGFX(xe)) + return true; + + for (i = 0; i < madvise_range->num_vmas; i++) { + struct xe_vma *vma = madvise_range->vmas[i]; + struct xe_bo *bo = xe_vma_bo(vma); + + if (bo) { + /* BO with WB caching + COH_NONE is not allowed */ + if (XE_IOCTL_DBG(xe, bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) + return false; + /* Imported dma-buf without caching info, assume cached */ + if (XE_IOCTL_DBG(xe, !bo->cpu_caching)) + return false; + } else if (XE_IOCTL_DBG(xe, xe_vma_is_cpu_addr_mirror(vma) || + xe_vma_is_userptr(vma))) + /* System memory (userptr/SVM) is always CPU cached */ + return false; + } + + return true; +} + static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, int num_vmas, u32 atomic_val) { @@ -750,6 +647,14 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil } } + if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) { + if (!check_pat_args_are_sane(xe, &madvise_range, + args->pat_index.val)) { + err = -EINVAL; + goto free_vmas; + } + } + if (madvise_range.has_bo_vmas) { if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { if (!check_bo_args_are_sane(vm, madvise_range.vmas, diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h index 39acd2689ca0..a3078f634c7e 100644 --- a/drivers/gpu/drm/xe/xe_vm_madvise.h +++ b/drivers/gpu/drm/xe/xe_vm_madvise.h @@ -13,6 +13,4 @@ struct xe_bo; int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -void xe_bo_recompute_purgeable_state(struct xe_bo *bo); - #endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 546296f0220b..33df43d0bede 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -651,7 +651,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { }, { XE_RTP_NAME("18033852989"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) + XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) }, { XE_RTP_NAME("15016589081"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), @@ -743,14 +743,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) }, - { XE_RTP_NAME("14019988906"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) - }, - { XE_RTP_NAME("14019877138"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) - }, { XE_RTP_NAME("14021490052"), XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(FF_MODE, @@ -762,7 +754,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { }, { XE_RTP_NAME("22021007897"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, /* Xe3_LPG */ @@ -778,7 +770,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { }, { XE_RTP_NAME("22021007897"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, { XE_RTP_NAME("14024681466"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), |
