summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c388
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c322
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v1_0.c64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c52
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c33
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c15
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c15
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c24
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c44
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser.c73
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_bios_types.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c94
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_factory.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c8
-rw-r--r--drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h4
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c13
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c2
-rw-r--r--drivers/gpu/drm/bridge/chipone-icn6211.c4
-rw-r--r--drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c40
-rw-r--r--drivers/gpu/drm/bridge/ite-it66121.c5
-rw-r--r--drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c16
-rw-r--r--drivers/gpu/drm/bridge/tda998x_drv.c4
-rw-r--r--drivers/gpu/drm/drm_color_mgmt.c2
-rw-r--r--drivers/gpu/drm/drm_drv.c2
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c2
-rw-r--r--drivers/gpu/drm/drm_gem.c64
-rw-r--r--drivers/gpu/drm/drm_gem_framebuffer_helper.c4
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_sched.c16
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_mic.c8
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_hdmi.c1
-rw-r--r--drivers/gpu/drm/gma500/oaktrail_lvds.c9
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_types.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c11
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpcd.h15
-rw-r--r--drivers/gpu/drm/i915/display/intel_plane.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.c44
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c3
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c5
-rw-r--r--drivers/gpu/drm/imagination/pvr_fw_trace.c2
-rw-r--r--drivers/gpu/drm/loongson/lsdc_drv.c2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_cec.c2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_v2.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c7
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_hfi.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c8
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c12
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c3
-rw-r--r--drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c28
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_host.c1
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c11
-rw-r--r--drivers/gpu/drm/msm/msm_drv.h7
-rw-r--r--drivers/gpu/drm/msm/msm_gem.c33
-rw-r--r--drivers/gpu/drm/msm/msm_gem_shrinker.c44
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c6
-rw-r--r--drivers/gpu/drm/msm/msm_gem_vma.c12
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c4
-rw-r--r--drivers/gpu/drm/msm/msm_iommu.c5
-rw-r--r--drivers/gpu/drm/msm/msm_ringbuffer.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/base.c11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c18
-rw-r--r--drivers/gpu/drm/panel/Kconfig1
-rw-r--r--drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c2
-rw-r--r--drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c4
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx83102.c2
-rw-r--r--drivers/gpu/drm/panel/panel-himax-hx83121a.c4
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c2
-rw-r--r--drivers/gpu/drm/qxl/qxl_drv.c6
-rw-r--r--drivers/gpu/drm/radeon/ci_dpm.c9
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c6
-rw-r--r--drivers/gpu/drm/sti/sti_hda.c8
-rw-r--r--drivers/gpu/drm/sysfb/ofdrm.c2
-rw-r--r--drivers/gpu/drm/tiny/appletbdrm.c4
-rw-r--r--drivers/gpu/drm/tiny/bochs.c10
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c18
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_util.c11
-rw-r--r--drivers/gpu/drm/ttm/ttm_pool.c18
-rw-r--r--drivers/gpu/drm/ttm/ttm_resource.c18
-rw-r--r--drivers/gpu/drm/udl/udl_main.c3
-rw-r--r--drivers/gpu/drm/udl/udl_modeset.c5
-rw-r--r--drivers/gpu/drm/v3d/v3d_sched.c16
-rw-r--r--drivers/gpu/drm/v3d/v3d_submit.c22
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_drv.h1
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_gem.c17
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_plane.c10
-rw-r--r--drivers/gpu/drm/xe/Makefile1
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c12
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h6
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c14
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h88
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h31
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c65
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.c4
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c9
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c7
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c8
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c24
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.h6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h7
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c5
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c49
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c13
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_mem_pool.c403
-rw-r--r--drivers/gpu/drm/xe/xe_mem_pool.h35
-rw-r--r--drivers/gpu/drm/xe/xe_mem_pool_types.h21
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c26
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c74
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c6
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c10
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.c2
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_migration.c7
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.c54
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_tile_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c4
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c56
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c209
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.h2
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c14
192 files changed, 2414 insertions, 1347 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8bc591deb546..fd50da4c7b18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1190,7 +1190,6 @@ struct amdgpu_device {
bool apu_prefer_gtt;
bool userq_halt_for_enforce_isolation;
- struct work_struct userq_reset_work;
struct amdgpu_uid *uid_info;
struct amdgpu_uma_carveout_info uma_info;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index d9e283f3b57d..9783a3cefb04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -36,6 +36,9 @@
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"
#include "amdgpu_reset.h"
+#if IS_ENABLED(CONFIG_HSA_AMD)
+#include "kfd_priv.h"
+#endif
/* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables
@@ -320,6 +323,28 @@ void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
(void)amdgpu_reset_domain_schedule(adev->reset_domain, &adev->kfd.reset_work);
}
+void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev)
+{
+#if IS_ENABLED(CONFIG_HSA_AMD)
+ struct kfd_dev *kfd = adev->kfd.dev;
+ unsigned int i;
+
+ if (!kfd)
+ return;
+
+ for (i = 0; i < kfd->num_nodes; i++) {
+ struct kfd_node *node = kfd->nodes[i];
+
+ kfd_dev_unmap_mapping_range(KFD_MMAP_TYPE_DOORBELL |
+ KFD_MMAP_GPU_ID(node->id),
+ kfd_doorbell_process_slice(kfd));
+ kfd_dev_unmap_mapping_range(KFD_MMAP_TYPE_MMIO |
+ KFD_MMAP_GPU_ID(node->id),
+ PAGE_SIZE);
+ }
+#endif
+}
+
int amdgpu_amdkfd_alloc_kernel_mem(struct amdgpu_device *adev, size_t size,
u32 domain, void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool cp_mqd_gfx9)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index cdbab7f8cee8..2b4108f83f48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -358,6 +358,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag, int8_t xcp_id);
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag, int8_t xcp_id);
+void amdgpu_amdkfd_clear_kfd_mapping(struct amdgpu_device *adev);
u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 737ef1ef96a5..feab90e3efd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2839,8 +2839,12 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
* that checks whether the PSP is running. A solution for those issues
* in the APU is to trigger a GPU reset, but this should be done during
* the unload phase to avoid adding boot latency and screen flicker.
+ * GFX V11 has GC block as default off IP. Every time AMDGPU driver sends
+ * a request to PMFW to unload MP1, PMFW will put GC in reset and power down
+ * the voltage. Hence, skipping reset for APUs with GFX V11 or later.
*/
- if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) {
+ if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 0, 0)) {
r = amdgpu_asic_reset(adev);
if (r)
dev_err(adev->dev, "asic reset on %s failed\n", __func__);
@@ -3783,7 +3787,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
- INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
amdgpu_coredump_init(adev);
@@ -5474,7 +5477,7 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev))
cancel_work(&adev->reset_work);
#endif
- cancel_work(&adev->userq_reset_work);
+ amdgpu_userq_mgr_cancel_reset_work(adev);
if (adev->kfd.dev)
cancel_work(&adev->kfd.reset_work);
@@ -5832,6 +5835,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
/* We need to lock reset domain only once both for XGMI and single device */
amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+ /* unmap all the mappings of doorbell and framebuffer to prevent user space from
+ * accessing them
+ */
+ unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
+ amdgpu_amdkfd_clear_kfd_mapping(adev);
+
amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
hive, need_emergency_restart);
if (need_emergency_restart)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index fcad7daaa41b..80efeca0ab73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -304,7 +304,7 @@ static int amdgpu_discovery_get_tmr_info(struct amdgpu_device *adev,
adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset;
adev->discovery.size =
adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb << 10;
- if (!adev->discovery.offset || !adev->discovery.size)
+ if (!adev->discovery.size)
return -EINVAL;
} else {
goto out;
@@ -3090,10 +3090,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 5, 3):
- adev->family = AMDGPU_FAMILY_GC_11_5_0;
- break;
case IP_VERSION(11, 5, 4):
- adev->family = AMDGPU_FAMILY_GC_11_5_4;
+ adev->family = AMDGPU_FAMILY_GC_11_5_0;
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index e47921e2a9af..60debd543e44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -3149,17 +3149,15 @@ static int __init amdgpu_init(void)
r = amdgpu_sync_init();
if (r)
- goto error_sync;
-
- r = amdgpu_userq_fence_slab_init();
- if (r)
- goto error_fence;
+ return r;
amdgpu_register_atpx_handler();
amdgpu_acpi_detect();
- /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
- amdgpu_amdkfd_init();
+ /* Ignore KFD init failures when CONFIG_HSA_AMD is not set. */
+ r = amdgpu_amdkfd_init();
+ if (r && r != -ENOENT)
+ goto error_fini_sync;
if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) {
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
@@ -3170,10 +3168,8 @@ static int __init amdgpu_init(void)
/* let modprobe override vga console setting */
return pci_register_driver(&amdgpu_kms_pci_driver);
-error_fence:
+error_fini_sync:
amdgpu_sync_fini();
-
-error_sync:
return r;
}
@@ -3184,7 +3180,6 @@ static void __exit amdgpu_exit(void)
amdgpu_unregister_atpx_handler();
amdgpu_acpi_release();
amdgpu_sync_fini();
- amdgpu_userq_fence_slab_fini();
mmu_notifier_synchronize();
amdgpu_xcp_drv_release();
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index bc772ca3dab7..b6f849d51c2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -262,12 +262,19 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
*/
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
{
+ int r;
+
if (adev->gart.bo != NULL)
return 0;
- return amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo,
- NULL, (void *)&adev->gart.ptr);
+ r = amdgpu_bo_create_kernel(adev, adev->gart.table_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo,
+ NULL, (void *)&adev->gart.ptr);
+ if (r)
+ return r;
+
+ memset_io(adev->gart.ptr, adev->gart.gart_pte_flags, adev->gart.table_size);
+ return 0;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 5376035d32fe..123d4a09114d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -31,6 +31,7 @@
#include <linux/pci.h>
#include <linux/dma-buf.h>
#include <linux/dma-fence-unwrap.h>
+#include <linux/uaccess.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_drv.h>
@@ -508,6 +509,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
if (offset_in_page(args->addr | args->size))
return -EINVAL;
+ if (!access_ok((void __user *)(uintptr_t)args->addr, args->size))
+ return -EFAULT;
+
/* reject unknown flag values */
if (args->flags & ~(AMDGPU_GEM_USERPTR_READONLY |
AMDGPU_GEM_USERPTR_ANONONLY | AMDGPU_GEM_USERPTR_VALIDATE |
@@ -821,7 +825,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct drm_syncobj *timeline_syncobj = NULL;
struct dma_fence_chain *timeline_chain = NULL;
struct drm_exec exec;
- uint64_t vm_size;
+ uint64_t vm_size, tmp;
int r = 0;
/* Validate virtual address range against reserved regions. */
@@ -845,7 +849,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
vm_size -= AMDGPU_VA_RESERVED_TOP;
- if (args->va_address + args->map_size > vm_size) {
+ if (check_add_overflow(args->va_address, args->map_size, &tmp) || tmp > vm_size) {
dev_dbg(dev->dev,
"va_address 0x%llx is in top reserved area 0x%llx\n",
args->va_address + args->map_size, vm_size);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 285e217fba04..3d9497d121ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -314,7 +314,10 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
mc->gart_start = max_mc_address - mc->gart_size + 1;
break;
case AMDGPU_GART_PLACEMENT_LOW:
- mc->gart_start = 0;
+ if (size_bf >= mc->gart_size)
+ mc->gart_start = 0;
+ else
+ mc->gart_start = ALIGN(mc->fb_end, four_gb);
break;
case AMDGPU_GART_PLACEMENT_BEST_FIT:
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 620fddde4c4d..a5d26b943f6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -199,11 +199,18 @@ int amdgpu_gtt_mgr_alloc_entries(struct amdgpu_gtt_mgr *mgr,
enum drm_mm_insert_mode mode)
{
struct amdgpu_device *adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
+ u32 alignment = 0;
int r;
+ /* Align to TLB L2 cache entry size to work around "V bit HW bug" */
+ if (adev->asic_type == CHIP_TAHITI) {
+ alignment = 32 * 1024 / AMDGPU_GPU_PAGE_SIZE;
+ num_pages = ALIGN(num_pages, alignment);
+ }
+
spin_lock(&mgr->lock);
r = drm_mm_insert_node_in_range(&mgr->mm, mm_node, num_pages,
- 0, GART_ENTRY_WITHOUT_BO_COLOR, 0,
+ alignment, GART_ENTRY_WITHOUT_BO_COLOR, 0,
adev->gmc.gart_size >> PAGE_SHIFT,
mode);
spin_unlock(&mgr->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 06efce38f323..71272f40feef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -873,68 +873,59 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
? -EFAULT : 0;
}
case AMDGPU_INFO_READ_MMR_REG: {
- int ret = 0;
- unsigned int n, alloc_size;
- uint32_t *regs;
unsigned int se_num = (info->read_mmr_reg.instance >>
AMDGPU_INFO_MMR_SE_INDEX_SHIFT) &
AMDGPU_INFO_MMR_SE_INDEX_MASK;
unsigned int sh_num = (info->read_mmr_reg.instance >>
AMDGPU_INFO_MMR_SH_INDEX_SHIFT) &
AMDGPU_INFO_MMR_SH_INDEX_MASK;
-
- if (!down_read_trylock(&adev->reset_domain->sem))
- return -ENOENT;
+ unsigned int alloc_size;
+ uint32_t *regs;
+ int ret;
/* set full masks if the userspace set all bits
* in the bitfields
*/
- if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) {
+ if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK)
se_num = 0xffffffff;
- } else if (se_num >= AMDGPU_GFX_MAX_SE) {
- ret = -EINVAL;
- goto out;
- }
+ else if (se_num >= AMDGPU_GFX_MAX_SE)
+ return -EINVAL;
- if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) {
+ if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)
sh_num = 0xffffffff;
- } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) {
- ret = -EINVAL;
- goto out;
- }
+ else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE)
+ return -EINVAL;
- if (info->read_mmr_reg.count > 128) {
- ret = -EINVAL;
- goto out;
- }
+ if (info->read_mmr_reg.count > 128)
+ return -EINVAL;
- regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
- if (!regs) {
- ret = -ENOMEM;
- goto out;
- }
+ regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs),
+ GFP_KERNEL);
+ if (!regs)
+ return -ENOMEM;
+ down_read(&adev->reset_domain->sem);
alloc_size = info->read_mmr_reg.count * sizeof(*regs);
-
amdgpu_gfx_off_ctrl(adev, false);
+ ret = 0;
for (i = 0; i < info->read_mmr_reg.count; i++) {
if (amdgpu_asic_read_register(adev, se_num, sh_num,
info->read_mmr_reg.dword_offset + i,
&regs[i])) {
DRM_DEBUG_KMS("unallowed offset %#x\n",
info->read_mmr_reg.dword_offset + i);
- kfree(regs);
- amdgpu_gfx_off_ctrl(adev, true);
ret = -EFAULT;
- goto out;
+ break;
}
}
amdgpu_gfx_off_ctrl(adev, true);
- n = copy_to_user(out, regs, min(size, alloc_size));
- kfree(regs);
- ret = (n ? -EFAULT : 0);
-out:
up_read(&adev->reset_domain->sem);
+
+ if (!ret) {
+ ret = copy_to_user(out, regs, min(size, alloc_size))
+ ? -EFAULT : 0;
+ }
+ kfree(regs);
return ret;
}
case AMDGPU_INFO_DEV_INFO: {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 912c9afaf9e1..4d68732d6223 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -96,7 +96,8 @@ struct amdgpu_bo_va {
* if non-zero, cannot unmap from GPU because user queues may still access it
*/
unsigned int queue_refcount;
- atomic_t userq_va_mapped;
+ /* Indicates if this buffer is mapped for any user queue. Once set, never reset. */
+ bool userq_va_mapped;
};
struct amdgpu_bo {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index cdf4909592d2..0c57fe259894 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -1950,7 +1950,7 @@ void amdgpu_ras_check_bad_page_status(struct amdgpu_device *adev)
if (!control || amdgpu_bad_page_threshold == 0)
return;
- if (control->ras_num_bad_pages >= ras->bad_page_cnt_threshold) {
+ if (control->ras_num_bad_pages > ras->bad_page_cnt_threshold) {
if (amdgpu_dpm_send_rma_reason(adev))
dev_warn(adev->dev, "Unable to send out-of-band RMA CPER");
else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 66e8a2f7afcf..d6bee5c30073 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -552,8 +552,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
- uint32_t value, result, early[3];
+ u32 value, result, early[3] = { 0 };
uint64_t p;
+ u32 avail_dw, start_dw, read_dw;
loff_t i;
int r;
@@ -565,10 +566,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
result = 0;
- if (*pos < 12) {
- if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
- mutex_lock(&ring->adev->cper.ring_lock);
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ mutex_lock(&ring->adev->cper.ring_lock);
+ if (*pos < 12) {
early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
early[2] = ring->wptr & ring->buf_mask;
@@ -600,13 +601,24 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
*pos += 4;
}
} else {
+ early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
+ early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
+
p = early[0];
if (early[0] <= early[1])
- size = (early[1] - early[0]);
+ avail_dw = early[1] - early[0];
else
- size = ring->ring_size - (early[0] - early[1]);
+ avail_dw = ring->buf_mask + 1 - (early[0] - early[1]);
- while (size) {
+ start_dw = (*pos > 12) ? ((*pos - 12) >> 2) : 0;
+ if (start_dw >= avail_dw)
+ goto out;
+
+ p = (p + start_dw) & ring->ptr_mask;
+ avail_dw -= start_dw;
+ read_dw = min_t(u32, avail_dw, size >> 2);
+
+ while (read_dw) {
if (p == early[1])
goto out;
@@ -619,9 +631,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
buf += 4;
result += 4;
- size--;
+ read_dw--;
p++;
p &= ring->ptr_mask;
+ *pos += 4;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
index a0b479d5fff1..f4be19223588 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -175,11 +175,14 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va,
{
unsigned long bit_pos;
- bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem);
- if (bit_pos >= adev->seq64.num_sem)
- return -ENOSPC;
+ for (;;) {
+ bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem);
+ if (bit_pos >= adev->seq64.num_sem)
+ return -ENOSPC;
- __set_bit(bit_pos, adev->seq64.used);
+ if (!test_and_set_bit(bit_pos, adev->seq64.used))
+ break;
+ }
*va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev);
@@ -205,7 +208,7 @@ void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va)
bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64);
if (bit_pos < adev->seq64.num_sem)
- __clear_bit(bit_pos, adev->seq64.used);
+ clear_bit(bit_pos, adev->seq64.used);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0dc68fb9d88e..3d2e00efc741 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -75,6 +75,9 @@ static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
unsigned int type,
uint64_t size_in_page)
{
+ if (!size_in_page)
+ return 0;
+
return ttm_range_man_init(&adev->mman.bdev, type,
false, size_in_page);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 0238c2798de4..b8ed931f8a40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -130,6 +130,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
adev->umc.ras->ras_block.hw_ops->query_ras_error_address &&
adev->umc.max_ras_err_cnt_per_query) {
+ kfree(err_data->err_addr);
err_data->err_addr =
kzalloc_objs(struct eeprom_table_record,
adev->umc.max_ras_err_cnt_per_query);
@@ -160,6 +161,7 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
if (adev->umc.ras &&
adev->umc.ras->ecc_info_query_ras_error_address &&
adev->umc.max_ras_err_cnt_per_query) {
+ kfree(err_data->err_addr);
err_data->err_addr =
kzalloc_objs(struct eeprom_table_record,
adev->umc.max_ras_err_cnt_per_query);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index d5abf785ca17..f070ea37d918 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -82,19 +82,11 @@ static bool amdgpu_userq_is_reset_type_supported(struct amdgpu_device *adev,
return false;
}
-static void amdgpu_userq_gpu_reset(struct amdgpu_device *adev)
-{
- if (amdgpu_device_should_recover_gpu(adev)) {
- amdgpu_reset_domain_schedule(adev->reset_domain,
- &adev->userq_reset_work);
- /* Wait for the reset job to complete */
- flush_work(&adev->userq_reset_work);
- }
-}
-
-static int
-amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
+static void amdgpu_userq_mgr_reset_work(struct work_struct *work)
{
+ struct amdgpu_userq_mgr *uq_mgr =
+ container_of(work, struct amdgpu_userq_mgr,
+ reset_work);
struct amdgpu_device *adev = uq_mgr->adev;
const int queue_types[] = {
AMDGPU_RING_TYPE_COMPUTE,
@@ -103,15 +95,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
};
const int num_queue_types = ARRAY_SIZE(queue_types);
bool gpu_reset = false;
- int r = 0;
- int i;
-
- /* Warning if current process mutex is not held */
- WARN_ON(!mutex_is_locked(&uq_mgr->userq_mutex));
+ int i, r;
if (unlikely(adev->debug_disable_gpu_ring_reset)) {
dev_err(adev->dev, "userq reset disabled by debug mask\n");
- return 0;
+ return;
}
/*
@@ -119,7 +107,7 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
* skip all reset detection logic
*/
if (!amdgpu_gpu_recovery)
- return 0;
+ return;
/*
* Iterate through all queue types to detect and reset problematic queues
@@ -127,9 +115,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
*/
for (i = 0; i < num_queue_types; i++) {
int ring_type = queue_types[i];
- const struct amdgpu_userq_funcs *funcs = adev->userq_funcs[ring_type];
+ const struct amdgpu_userq_funcs *funcs =
+ adev->userq_funcs[ring_type];
- if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, AMDGPU_RESET_TYPE_PER_QUEUE))
+ if (!amdgpu_userq_is_reset_type_supported(adev, ring_type,
+ AMDGPU_RESET_TYPE_PER_QUEUE))
continue;
if (atomic_read(&uq_mgr->userq_count[ring_type]) > 0 &&
@@ -142,46 +132,43 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
}
}
- if (gpu_reset)
- amdgpu_userq_gpu_reset(adev);
+ if (gpu_reset) {
+ struct amdgpu_reset_context reset_context;
- return r;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ reset_context.src = AMDGPU_RESET_SRC_USERQ;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
}
static void amdgpu_userq_hang_detect_work(struct work_struct *work)
{
- struct amdgpu_usermode_queue *queue = container_of(work,
- struct amdgpu_usermode_queue,
- hang_detect_work.work);
- struct dma_fence *fence;
- struct amdgpu_userq_mgr *uq_mgr;
-
- if (!queue->userq_mgr)
- return;
-
- uq_mgr = queue->userq_mgr;
- fence = READ_ONCE(queue->hang_detect_fence);
- /* Fence already signaled – no action needed */
- if (!fence || dma_fence_is_signaled(fence))
- return;
+ struct amdgpu_usermode_queue *queue =
+ container_of(work, struct amdgpu_usermode_queue,
+ hang_detect_work.work);
- mutex_lock(&uq_mgr->userq_mutex);
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
- mutex_unlock(&uq_mgr->userq_mutex);
+ /*
+ * Don't schedule the work here! Scheduling or queue work from one reset
+ * handler to another is illegal if you don't take extra precautions!
+ */
+ amdgpu_userq_mgr_reset_work(&queue->userq_mgr->reset_work);
}
/*
* Start hang detection for a user queue fence. A delayed work will be scheduled
- * to check if the fence is still pending after the timeout period.
-*/
+ * to reset the queues when the fence doesn't signal in time.
+ */
void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue)
{
struct amdgpu_device *adev;
unsigned long timeout_ms;
- if (!queue || !queue->userq_mgr || !queue->userq_mgr->adev)
- return;
-
adev = queue->userq_mgr->adev;
/* Determine timeout based on queue type */
switch (queue->queue_type) {
@@ -199,16 +186,33 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue)
break;
}
- /* Store the fence to monitor and schedule hang detection */
- WRITE_ONCE(queue->hang_detect_fence, queue->last_fence);
- schedule_delayed_work(&queue->hang_detect_work,
- msecs_to_jiffies(timeout_ms));
+ queue_delayed_work(adev->reset_domain->wq, &queue->hang_detect_work,
+ msecs_to_jiffies(timeout_ms));
}
-static void amdgpu_userq_init_hang_detect_work(struct amdgpu_usermode_queue *queue)
+void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell)
{
- INIT_DELAYED_WORK(&queue->hang_detect_work, amdgpu_userq_hang_detect_work);
- queue->hang_detect_fence = NULL;
+ struct xarray *xa = &adev->userq_doorbell_xa;
+ struct amdgpu_usermode_queue *queue;
+ unsigned long flags;
+ int r;
+
+ xa_lock_irqsave(xa, flags);
+ queue = xa_load(xa, doorbell);
+ if (queue) {
+ r = amdgpu_userq_fence_driver_process(queue->fence_drv);
+ /*
+ * We are in interrupt context here, this *can't* wait for
+ * reset work to finish.
+ */
+ if (r >= 0)
+ cancel_delayed_work(&queue->hang_detect_work);
+
+ /* Restart the timer when there are still fences pending */
+ if (r == 1)
+ amdgpu_userq_start_hang_detect_work(queue);
+ }
+ xa_unlock_irqrestore(xa, flags);
}
static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue,
@@ -223,7 +227,7 @@ static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue,
INIT_LIST_HEAD(&va_cursor->list);
va_cursor->gpu_addr = addr;
- atomic_set(&va_map->bo_va->userq_va_mapped, 1);
+ va_map->bo_va->userq_va_mapped = true;
list_add(&va_cursor->list, &queue->userq_va_list);
return 0;
@@ -270,7 +274,7 @@ static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr)
dma_resv_assert_held(vm->root.bo->tbo.base.resv);
mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
- if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped))
+ if (!IS_ERR_OR_NULL(mapping) && mapping->bo_va->userq_va_mapped)
r = true;
else
r = false;
@@ -296,16 +300,8 @@ static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue)
return false;
}
-static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping,
- struct amdgpu_userq_va_cursor *va_cursor)
-{
- atomic_set(&mapping->bo_va->userq_va_mapped, 0);
- list_del(&va_cursor->list);
- kfree(va_cursor);
-}
-
-static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev,
- struct amdgpu_usermode_queue *queue)
+static void amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *queue)
{
struct amdgpu_userq_va_cursor *va_cursor, *tmp;
struct amdgpu_bo_va_mapping *mapping;
@@ -315,15 +311,12 @@ static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev,
list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr);
- if (!mapping) {
- return -EINVAL;
- }
- dev_dbg(adev->dev, "delete the userq:%p va:%llx\n",
- queue, va_cursor->gpu_addr);
- amdgpu_userq_buffer_va_list_del(mapping, va_cursor);
+ if (mapping)
+ dev_dbg(adev->dev, "delete the userq:%p va:%llx\n",
+ queue, va_cursor->gpu_addr);
+ list_del(&va_cursor->list);
+ kfree(va_cursor);
}
-
- return 0;
}
static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue)
@@ -332,23 +325,18 @@ static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue)
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs =
adev->userq_funcs[queue->queue_type];
- bool found_hung_queue = false;
- int r = 0;
+ int r;
if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
r = userq_funcs->preempt(queue);
if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
- found_hung_queue = true;
+ return r;
} else {
queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
}
}
-
- if (found_hung_queue)
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
-
- return r;
+ return 0;
}
static int amdgpu_userq_restore_helper(struct amdgpu_usermode_queue *queue)
@@ -377,24 +365,21 @@ static int amdgpu_userq_unmap_helper(struct amdgpu_usermode_queue *queue)
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs =
adev->userq_funcs[queue->queue_type];
- bool found_hung_queue = false;
- int r = 0;
+ int r;
if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
- (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
+ (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
+
r = userq_funcs->unmap(queue);
if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
- found_hung_queue = true;
+ return r;
} else {
queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
}
}
- if (found_hung_queue)
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
-
- return r;
+ return 0;
}
static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue)
@@ -403,19 +388,19 @@ static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue)
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *userq_funcs =
adev->userq_funcs[queue->queue_type];
- int r = 0;
+ int r;
if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
r = userq_funcs->map(queue);
if (r) {
queue->state = AMDGPU_USERQ_STATE_HUNG;
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
+ return r;
} else {
queue->state = AMDGPU_USERQ_STATE_MAPPED;
}
}
- return r;
+ return 0;
}
static void amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue)
@@ -512,16 +497,20 @@ int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
goto free_obj;
}
+ r = amdgpu_bo_pin(userq_obj->obj, AMDGPU_GEM_DOMAIN_GTT);
+ if (r)
+ goto unresv;
+
r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo);
if (r) {
drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r);
- goto unresv;
+ goto unpin_bo;
}
r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr);
if (r) {
drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r);
- goto unresv;
+ goto unpin_bo;
}
userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj);
@@ -529,11 +518,13 @@ int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
memset(userq_obj->cpu_ptr, 0, size);
return 0;
+unpin_bo:
+ amdgpu_bo_unpin(userq_obj->obj);
unresv:
amdgpu_bo_unreserve(userq_obj->obj);
-
free_obj:
amdgpu_bo_unref(&userq_obj->obj);
+
return r;
}
@@ -541,6 +532,7 @@ void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_userq_obj *userq_obj)
{
amdgpu_bo_kunmap(userq_obj->obj);
+ amdgpu_bo_unpin(userq_obj->obj);
amdgpu_bo_unref(&userq_obj->obj);
}
@@ -635,20 +627,12 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que
amdgpu_bo_unreserve(vm->root.bo);
mutex_lock(&uq_mgr->userq_mutex);
- queue->hang_detect_fence = NULL;
amdgpu_userq_wait_for_last_fence(queue);
#if defined(CONFIG_DEBUG_FS)
debugfs_remove_recursive(queue->debugfs_queue);
#endif
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
r = amdgpu_userq_unmap_helper(queue);
- /*TODO: It requires a reset for userq hw unmap error*/
- if (r) {
- drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
- queue->state = AMDGPU_USERQ_STATE_HUNG;
- }
-
atomic_dec(&uq_mgr->userq_count[queue->queue_type]);
amdgpu_userq_cleanup(queue);
mutex_unlock(&uq_mgr->userq_mutex);
@@ -724,14 +708,14 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
const struct amdgpu_userq_funcs *uq_funcs;
struct amdgpu_usermode_queue *queue;
struct amdgpu_db_info db_info;
- bool skip_map_queue;
- u32 qid;
uint64_t index;
- int r = 0;
- int priority =
- (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
- AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
+ int priority;
+ u32 qid;
+ int r;
+ priority =
+ (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK)
+ >> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
r = amdgpu_userq_priority_permit(filp, priority);
if (r)
return r;
@@ -744,40 +728,43 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
uq_funcs = adev->userq_funcs[args->in.ip_type];
if (!uq_funcs) {
- drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n",
- args->in.ip_type);
r = -EINVAL;
goto err_pm_runtime;
}
queue = kzalloc_obj(struct amdgpu_usermode_queue);
if (!queue) {
- drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n");
r = -ENOMEM;
goto err_pm_runtime;
}
+ kref_init(&queue->refcount);
INIT_LIST_HEAD(&queue->userq_va_list);
queue->doorbell_handle = args->in.doorbell_handle;
queue->queue_type = args->in.ip_type;
queue->vm = &fpriv->vm;
queue->priority = priority;
-
- db_info.queue_type = queue->queue_type;
- db_info.doorbell_handle = queue->doorbell_handle;
- db_info.db_obj = &queue->db_obj;
- db_info.doorbell_offset = args->in.doorbell_offset;
-
queue->userq_mgr = uq_mgr;
+ INIT_DELAYED_WORK(&queue->hang_detect_work,
+ amdgpu_userq_hang_detect_work);
- /* Validate the userq virtual address.*/
- r = amdgpu_bo_reserve(fpriv->vm.root.bo, false);
+ mutex_init(&queue->fence_drv_lock);
+ xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+ r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
if (r)
goto free_queue;
- if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, args->in.queue_size) ||
- amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
- amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
+ /* Make sure the queue can actually run with those virtual addresses. */
+ r = amdgpu_bo_reserve(fpriv->vm.root.bo, false);
+ if (r)
+ goto free_fence_drv;
+
+ if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va,
+ args->in.queue_size) ||
+ amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va,
+ AMDGPU_GPU_PAGE_SIZE) ||
+ amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va,
+ AMDGPU_GPU_PAGE_SIZE)) {
r = -EINVAL;
amdgpu_bo_unreserve(fpriv->vm.root.bo);
goto clean_mapping;
@@ -785,6 +772,10 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
amdgpu_bo_unreserve(fpriv->vm.root.bo);
/* Convert relative doorbell offset into absolute doorbell index */
+ db_info.queue_type = queue->queue_type;
+ db_info.doorbell_handle = queue->doorbell_handle;
+ db_info.db_obj = &queue->db_obj;
+ db_info.doorbell_offset = args->in.doorbell_offset;
index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
if (index == (uint64_t)-EINVAL) {
drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");
@@ -793,79 +784,64 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
}
queue->doorbell_index = index;
- xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
- r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
- if (r) {
- drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
- goto clean_mapping;
- }
-
r = uq_funcs->mqd_create(queue, &args->in);
if (r) {
drm_file_err(uq_mgr->file, "Failed to create Queue\n");
- goto clean_fence_driver;
+ goto clean_mapping;
}
+ /* Update VM owner at userq submit-time for page-fault attribution. */
+ amdgpu_vm_set_task_info(&fpriv->vm);
+
+ r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue,
+ GFP_KERNEL));
+ if (r)
+ goto clean_mqd;
+
amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
/* don't map the queue if scheduling is halted */
- if (adev->userq_halt_for_enforce_isolation &&
- ((queue->queue_type == AMDGPU_HW_IP_GFX) ||
- (queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
- skip_map_queue = true;
- else
- skip_map_queue = false;
- if (!skip_map_queue) {
+ if (!adev->userq_halt_for_enforce_isolation ||
+ ((queue->queue_type != AMDGPU_HW_IP_GFX) &&
+ (queue->queue_type != AMDGPU_HW_IP_COMPUTE))) {
r = amdgpu_userq_map_helper(queue);
if (r) {
drm_file_err(uq_mgr->file, "Failed to map Queue\n");
- goto clean_mqd;
+ mutex_unlock(&uq_mgr->userq_mutex);
+ goto clean_doorbell;
}
}
- /* drop this refcount during queue destroy */
- kref_init(&queue->refcount);
-
- /* Wait for mode-1 reset to complete */
- down_read(&adev->reset_domain->sem);
+ atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
+ mutex_unlock(&uq_mgr->userq_mutex);
r = xa_alloc(&uq_mgr->userq_xa, &qid, queue,
- XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
- if (r) {
- if (!skip_map_queue)
- amdgpu_userq_unmap_helper(queue);
- r = -ENOMEM;
- goto clean_reset_domain;
- }
-
- r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
+ XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT),
+ GFP_KERNEL);
if (r) {
- xa_erase(&uq_mgr->userq_xa, qid);
- if (!skip_map_queue)
- amdgpu_userq_unmap_helper(queue);
- goto clean_reset_domain;
+ /*
+ * This drops the last reference which should take care of
+ * all cleanup.
+ */
+ amdgpu_userq_put(queue);
+ return r;
}
- up_read(&adev->reset_domain->sem);
amdgpu_debugfs_userq_init(filp, queue, qid);
- amdgpu_userq_init_hang_detect_work(queue);
-
args->out.queue_id = qid;
- atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
- mutex_unlock(&uq_mgr->userq_mutex);
return 0;
-clean_reset_domain:
- up_read(&adev->reset_domain->sem);
+clean_doorbell:
+ xa_erase_irq(&adev->userq_doorbell_xa, index);
clean_mqd:
- mutex_unlock(&uq_mgr->userq_mutex);
uq_funcs->mqd_destroy(queue);
-clean_fence_driver:
- amdgpu_userq_fence_driver_free(queue);
clean_mapping:
amdgpu_bo_reserve(fpriv->vm.root.bo, true);
amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
amdgpu_bo_unreserve(fpriv->vm.root.bo);
+ mutex_destroy(&queue->fence_drv_lock);
+free_fence_drv:
+ amdgpu_userq_fence_driver_free(queue);
free_queue:
kfree(queue);
err_pm_runtime:
@@ -1187,7 +1163,7 @@ retry_lock:
bo = range->bo;
ret = amdgpu_ttm_tt_get_user_pages(bo, range);
if (ret)
- goto unlock_all;
+ goto free_ranges;
}
invalidated = true;
@@ -1214,6 +1190,7 @@ retry_lock:
unlock_all:
drm_exec_fini(&exec);
+free_ranges:
xa_for_each(&xa, tmp_key, range) {
if (!range)
continue;
@@ -1254,7 +1231,6 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
unsigned long queue_id;
int ret = 0, r;
- amdgpu_userq_detect_and_reset_queues(uq_mgr);
/* Try to unmap all the queues in this process ctx */
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
r = amdgpu_userq_preempt_helper(queue);
@@ -1262,29 +1238,16 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
ret = r;
}
- if (ret)
+ if (ret) {
drm_file_err(uq_mgr->file,
"Couldn't unmap all the queues, eviction failed ret=%d\n", ret);
+ amdgpu_reset_domain_schedule(uq_mgr->adev->reset_domain,
+ &uq_mgr->reset_work);
+ flush_work(&uq_mgr->reset_work);
+ }
return ret;
}
-void amdgpu_userq_reset_work(struct work_struct *work)
-{
- struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
- userq_reset_work);
- struct amdgpu_reset_context reset_context;
-
- memset(&reset_context, 0, sizeof(reset_context));
-
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
- reset_context.src = AMDGPU_RESET_SRC_USERQ;
- set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
- /*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/
-
- amdgpu_device_gpu_recover(adev, NULL, &reset_context);
-}
-
static void
amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
{
@@ -1318,9 +1281,24 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f
userq_mgr->file = file_priv;
INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
+ INIT_WORK(&userq_mgr->reset_work, amdgpu_userq_mgr_reset_work);
return 0;
}
+void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev)
+{
+ struct xarray *xa = &adev->userq_doorbell_xa;
+ struct amdgpu_usermode_queue *queue;
+ unsigned long flags, queue_id;
+
+ xa_lock_irqsave(xa, flags);
+ xa_for_each(xa, queue_id, queue) {
+ cancel_delayed_work(&queue->hang_detect_work);
+ cancel_work(&queue->userq_mgr->reset_work);
+ }
+ xa_unlock_irqrestore(xa, flags);
+}
+
void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr)
{
cancel_delayed_work_sync(&userq_mgr->resume_work);
@@ -1346,6 +1324,14 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
}
xa_destroy(&userq_mgr->userq_xa);
+
+ /*
+ * Drain any in-flight reset_work. By this point all queues are freed
+ * and userq_count is 0, so if reset_work starts now it exits early.
+ * We still need to wait in case it was already executing gpu_recover.
+ */
+ cancel_work_sync(&userq_mgr->reset_work);
+
mutex_destroy(&userq_mgr->userq_mutex);
}
@@ -1364,7 +1350,6 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev)
uqm = queue->userq_mgr;
cancel_delayed_work_sync(&uqm->resume_work);
guard(mutex)(&uqm->userq_mutex);
- amdgpu_userq_detect_and_reset_queues(uqm);
if (adev->in_s0ix)
r = amdgpu_userq_preempt_helper(queue);
else
@@ -1423,7 +1408,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
(queue->xcp_id == idx)) {
- amdgpu_userq_detect_and_reset_queues(uqm);
r = amdgpu_userq_preempt_helper(queue);
if (r)
ret = r;
@@ -1496,23 +1480,21 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev)
{
const struct amdgpu_userq_funcs *userq_funcs;
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm;
unsigned long queue_id;
+ /* TODO: We probably need a new lock for the queue state */
xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
- uqm = queue->userq_mgr;
- cancel_delayed_work_sync(&uqm->resume_work);
- if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
- amdgpu_userq_wait_for_last_fence(queue);
- userq_funcs = adev->userq_funcs[queue->queue_type];
- userq_funcs->unmap(queue);
- /* just mark all queues as hung at this point.
- * if unmap succeeds, we could map again
- * in amdgpu_userq_post_reset() if vram is not lost
- */
- queue->state = AMDGPU_USERQ_STATE_HUNG;
- amdgpu_userq_fence_driver_force_completion(queue);
- }
+ if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
+ continue;
+
+ userq_funcs = adev->userq_funcs[queue->queue_type];
+ userq_funcs->unmap(queue);
+ /* just mark all queues as hung at this point.
+ * if unmap succeeds, we could map again
+ * in amdgpu_userq_post_reset() if vram is not lost
+ */
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ amdgpu_userq_fence_driver_force_completion(queue);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 675fe6395ac8..49b33e2d6932 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -66,14 +66,31 @@ struct amdgpu_usermode_queue {
struct amdgpu_userq_obj db_obj;
struct amdgpu_userq_obj fw_obj;
struct amdgpu_userq_obj wptr_obj;
+
+ /**
+ * @fence_drv_lock: Protecting @fence_drv_xa.
+ */
+ struct mutex fence_drv_lock;
+
+ /**
+ * @fence_drv_xa:
+ *
+ * References to the external fence drivers returned by wait_ioctl.
+ * Dropped on the next signaled dma_fence or queue destruction.
+ */
struct xarray fence_drv_xa;
struct amdgpu_userq_fence_driver *fence_drv;
struct dma_fence *last_fence;
u32 xcp_id;
int priority;
struct dentry *debugfs_queue;
- struct delayed_work hang_detect_work;
- struct dma_fence *hang_detect_fence;
+
+ /**
+ * @hang_detect_work:
+ *
+ * Delayed work which runs when userq_fences time out.
+ */
+ struct delayed_work hang_detect_work;
struct kref refcount;
struct list_head userq_va_list;
@@ -105,6 +122,13 @@ struct amdgpu_userq_mgr {
struct amdgpu_device *adev;
struct delayed_work resume_work;
struct drm_file *file;
+
+ /**
+ * @reset_work:
+ *
+ * Reset work which is used when eviction fails.
+ */
+ struct work_struct reset_work;
atomic_t userq_count[AMDGPU_RING_TYPE_MAX];
};
@@ -123,6 +147,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp
int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
struct amdgpu_device *adev);
+void amdgpu_userq_mgr_cancel_reset_work(struct amdgpu_device *adev);
void amdgpu_userq_mgr_cancel_resume(struct amdgpu_userq_mgr *userq_mgr);
void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr);
@@ -156,6 +181,7 @@ void amdgpu_userq_reset_work(struct work_struct *work);
void amdgpu_userq_pre_reset(struct amdgpu_device *adev);
int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost);
void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue);
+void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell);
int amdgpu_userq_input_va_validate(struct amdgpu_device *adev,
struct amdgpu_usermode_queue *queue,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index da39ac862f37..a41fb72dba94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -32,29 +32,9 @@
#include "amdgpu.h"
#include "amdgpu_userq_fence.h"
-static const struct dma_fence_ops amdgpu_userq_fence_ops;
-static struct kmem_cache *amdgpu_userq_fence_slab;
-
#define AMDGPU_USERQ_MAX_HANDLES (1U << 16)
-int amdgpu_userq_fence_slab_init(void)
-{
- amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
- sizeof(struct amdgpu_userq_fence),
- 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
- if (!amdgpu_userq_fence_slab)
- return -ENOMEM;
-
- return 0;
-}
-
-void amdgpu_userq_fence_slab_fini(void)
-{
- rcu_barrier();
- kmem_cache_destroy(amdgpu_userq_fence_slab);
-}
+static const struct dma_fence_ops amdgpu_userq_fence_ops;
static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
{
@@ -141,6 +121,7 @@ amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
userq->last_fence = NULL;
amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
xa_destroy(&userq->fence_drv_xa);
+ mutex_destroy(&userq->fence_drv_lock);
/* Drop the queue's ownership reference to fence_drv explicitly */
amdgpu_userq_fence_driver_put(userq->fence_drv);
}
@@ -154,7 +135,14 @@ amdgpu_userq_fence_put_fence_drv_array(struct amdgpu_userq_fence *userq_fence)
userq_fence->fence_drv_array_count = 0;
}
-void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
+/*
+ * Returns:
+ * -ENOENT when no fences were processes
+ * 1 when more fences are pending
+ * 0 when no fences are pending any more
+ */
+int
+amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
{
struct amdgpu_userq_fence *userq_fence, *tmp;
LIST_HEAD(to_be_signaled);
@@ -162,9 +150,6 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
unsigned long flags;
u64 rptr;
- if (!fence_drv)
- return;
-
spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
rptr = amdgpu_userq_fence_read(fence_drv);
@@ -177,6 +162,9 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
&userq_fence->link);
spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+ if (list_empty(&to_be_signaled))
+ return -ENOENT;
+
list_for_each_entry_safe(userq_fence, tmp, &to_be_signaled, link) {
fence = &userq_fence->base;
list_del_init(&userq_fence->link);
@@ -188,6 +176,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
dma_fence_put(fence);
}
+ /* That doesn't need to be accurate so no locking */
+ return list_empty(&fence_drv->fences) ? 0 : 1;
}
void amdgpu_userq_fence_driver_destroy(struct kref *ref)
@@ -229,80 +219,84 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
}
-static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
+static int amdgpu_userq_fence_alloc(struct amdgpu_usermode_queue *userq,
+ struct amdgpu_userq_fence **pfence)
{
- *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
- return *userq_fence ? 0 : -ENOMEM;
+ struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv;
+ struct amdgpu_userq_fence *userq_fence;
+ void *entry;
+
+ userq_fence = kmalloc(sizeof(*userq_fence), GFP_KERNEL);
+ if (!userq_fence)
+ return -ENOMEM;
+
+ /*
+ * Get the next unused entry, since we fill from the start this can be
+ * used as size to allocate the array.
+ */
+ mutex_lock(&userq->fence_drv_lock);
+ XA_STATE(xas, &userq->fence_drv_xa, 0);
+
+ rcu_read_lock();
+ do {
+ entry = xas_find_marked(&xas, ULONG_MAX, XA_FREE_MARK);
+ } while (xas_retry(&xas, entry));
+ rcu_read_unlock();
+
+ userq_fence->fence_drv_array = kvmalloc_array(xas.xa_index,
+ sizeof(fence_drv),
+ GFP_KERNEL);
+ if (!userq_fence->fence_drv_array) {
+ mutex_unlock(&userq->fence_drv_lock);
+ kfree(userq_fence);
+ return -ENOMEM;
+ }
+
+ userq_fence->fence_drv_array_count = xas.xa_index;
+ xa_extract(&userq->fence_drv_xa, (void **)userq_fence->fence_drv_array,
+ 0, ULONG_MAX, xas.xa_index, XA_PRESENT);
+ xa_destroy(&userq->fence_drv_xa);
+
+ mutex_unlock(&userq->fence_drv_lock);
+
+ amdgpu_userq_fence_driver_get(fence_drv);
+ userq_fence->fence_drv = fence_drv;
+
+ *pfence = userq_fence;
+ return 0;
}
-static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
- struct amdgpu_userq_fence *userq_fence,
- u64 seq, struct dma_fence **f)
+static void amdgpu_userq_fence_init(struct amdgpu_usermode_queue *userq,
+ struct amdgpu_userq_fence *fence,
+ u64 seq)
{
- struct amdgpu_userq_fence_driver *fence_drv;
- struct dma_fence *fence;
+ struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv;
unsigned long flags;
bool signaled = false;
- fence_drv = userq->fence_drv;
- if (!fence_drv)
- return -EINVAL;
-
- spin_lock_init(&userq_fence->lock);
- INIT_LIST_HEAD(&userq_fence->link);
- fence = &userq_fence->base;
- userq_fence->fence_drv = fence_drv;
-
- dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
+ spin_lock_init(&fence->lock);
+ dma_fence_init64(&fence->base, &amdgpu_userq_fence_ops, &fence->lock,
fence_drv->context, seq);
- amdgpu_userq_fence_driver_get(fence_drv);
- dma_fence_get(fence);
-
- if (!xa_empty(&userq->fence_drv_xa)) {
- struct amdgpu_userq_fence_driver *stored_fence_drv;
- unsigned long index, count = 0;
- int i = 0;
-
- xa_lock(&userq->fence_drv_xa);
- xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
- count++;
-
- userq_fence->fence_drv_array =
- kvmalloc_objs(struct amdgpu_userq_fence_driver *, count,
- GFP_ATOMIC);
-
- if (userq_fence->fence_drv_array) {
- xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
- userq_fence->fence_drv_array[i] = stored_fence_drv;
- __xa_erase(&userq->fence_drv_xa, index);
- i++;
- }
- }
-
- userq_fence->fence_drv_array_count = i;
- xa_unlock(&userq->fence_drv_xa);
- } else {
- userq_fence->fence_drv_array = NULL;
- userq_fence->fence_drv_array_count = 0;
- }
+ /* Make sure the fence is visible to the hang detect worker */
+ dma_fence_put(userq->last_fence);
+ userq->last_fence = dma_fence_get(&fence->base);
- /* Check if hardware has already processed the job */
+ /* Check if hardware has already processed the fence */
spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
- if (!dma_fence_is_signaled(fence)) {
- list_add_tail(&userq_fence->link, &fence_drv->fences);
+ if (!dma_fence_is_signaled(&fence->base)) {
+ dma_fence_get(&fence->base);
+ list_add_tail(&fence->link, &fence_drv->fences);
} else {
+ INIT_LIST_HEAD(&fence->link);
signaled = true;
- dma_fence_put(fence);
}
spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
if (signaled)
- amdgpu_userq_fence_put_fence_drv_array(userq_fence);
-
- *f = fence;
-
- return 0;
+ amdgpu_userq_fence_put_fence_drv_array(fence);
+ else
+ amdgpu_userq_start_hang_detect_work(userq);
}
static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
@@ -342,7 +336,7 @@ static void amdgpu_userq_fence_free(struct rcu_head *rcu)
amdgpu_userq_fence_driver_put(fence_drv);
kvfree(userq_fence->fence_drv_array);
- kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+ kfree(userq_fence);
}
static void amdgpu_userq_fence_release(struct dma_fence *f)
@@ -376,56 +370,48 @@ static int amdgpu_userq_fence_read_wptr(struct amdgpu_device *adev,
{
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_bo *bo;
+ struct drm_exec exec;
u64 addr, *ptr;
- int r;
-
- r = amdgpu_bo_reserve(queue->vm->root.bo, false);
- if (r)
- return r;
+ int ret;
addr = queue->userq_prop->wptr_gpu_addr;
addr &= AMDGPU_GMC_HOLE_MASK;
- mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
- if (!mapping) {
- amdgpu_bo_unreserve(queue->vm->root.bo);
- DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
- return -EINVAL;
- }
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2);
+ drm_exec_until_all_locked(&exec) {
+ ret = amdgpu_vm_lock_pd(queue->vm, &exec, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto lock_error;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
+ if (!mapping) {
+ ret = -EINVAL;
+ goto lock_error;
+ }
- bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
- amdgpu_bo_unreserve(queue->vm->root.bo);
- r = amdgpu_bo_reserve(bo, true);
- if (r) {
- amdgpu_bo_unref(&bo);
- DRM_ERROR("Failed to reserve userqueue wptr bo");
- return r;
+ ret = drm_exec_lock_obj(&exec, &mapping->bo_va->base.bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto lock_error;
}
- r = amdgpu_bo_kmap(bo, (void **)&ptr);
- if (r) {
+ bo = mapping->bo_va->base.bo;
+ ret = amdgpu_bo_kmap(bo, (void **)&ptr);
+ if (ret) {
DRM_ERROR("Failed mapping the userqueue wptr bo");
- goto map_error;
+ goto lock_error;
}
*wptr = le64_to_cpu(*ptr);
amdgpu_bo_kunmap(bo);
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
-
+ drm_exec_fini(&exec);
return 0;
-map_error:
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
-
- return r;
-}
-
-static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
-{
- dma_fence_put(fence);
+lock_error:
+ drm_exec_fini(&exec);
+ return ret;
}
static void
@@ -471,13 +457,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
const unsigned int num_read_bo_handles = args->num_bo_read_handles;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+
struct drm_gem_object **gobj_write, **gobj_read;
u32 *syncobj_handles, num_syncobj_handles;
- struct amdgpu_userq_fence *userq_fence;
- struct amdgpu_usermode_queue *queue = NULL;
- struct drm_syncobj **syncobj = NULL;
- struct dma_fence *fence;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_fence *fence;
+ struct drm_syncobj **syncobj;
struct drm_exec exec;
+ void __user *ptr;
int r, i, entry;
u64 wptr;
@@ -489,13 +476,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
num_syncobj_handles = args->num_syncobj_handles;
- syncobj_handles = memdup_array_user(u64_to_user_ptr(args->syncobj_handles),
- num_syncobj_handles, sizeof(u32));
+ ptr = u64_to_user_ptr(args->syncobj_handles);
+ syncobj_handles = memdup_array_user(ptr, num_syncobj_handles,
+ sizeof(u32));
if (IS_ERR(syncobj_handles))
return PTR_ERR(syncobj_handles);
- /* Array of pointers to the looked up syncobjs */
- syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
+ syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj),
+ GFP_KERNEL);
if (!syncobj) {
r = -ENOMEM;
goto free_syncobj_handles;
@@ -509,21 +497,17 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
}
}
- r = drm_gem_objects_lookup(filp,
- u64_to_user_ptr(args->bo_read_handles),
- num_read_bo_handles,
- &gobj_read);
+ ptr = u64_to_user_ptr(args->bo_read_handles);
+ r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read);
if (r)
goto free_syncobj;
- r = drm_gem_objects_lookup(filp,
- u64_to_user_ptr(args->bo_write_handles),
- num_write_bo_handles,
+ ptr = u64_to_user_ptr(args->bo_write_handles);
+ r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles,
&gobj_write);
if (r)
goto put_gobj_read;
- /* Retrieve the user queue */
queue = amdgpu_userq_get(userq_mgr, args->queue_id);
if (!queue) {
r = -ENOENT;
@@ -532,73 +516,61 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
r = amdgpu_userq_fence_read_wptr(adev, queue, &wptr);
if (r)
- goto put_gobj_write;
+ goto put_queue;
- r = amdgpu_userq_fence_alloc(&userq_fence);
+ r = amdgpu_userq_fence_alloc(queue, &fence);
if (r)
- goto put_gobj_write;
+ goto put_queue;
/* We are here means UQ is active, make sure the eviction fence is valid */
amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
- /* Create a new fence */
- r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
- if (r) {
- mutex_unlock(&userq_mgr->userq_mutex);
- kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
- goto put_gobj_write;
- }
+ /* Create the new fence */
+ amdgpu_userq_fence_init(queue, fence, wptr);
- dma_fence_put(queue->last_fence);
- queue->last_fence = dma_fence_get(fence);
- amdgpu_userq_start_hang_detect_work(queue);
mutex_unlock(&userq_mgr->userq_mutex);
+ /*
+ * This needs to come after the fence is created since
+ * amdgpu_userq_ensure_ev_fence() can't be called while holding the resv
+ * locks.
+ */
drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
(num_read_bo_handles + num_write_bo_handles));
- /* Lock all BOs with retry handling */
drm_exec_until_all_locked(&exec) {
- r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+ r = drm_exec_prepare_array(&exec, gobj_read,
+ num_read_bo_handles, 1);
drm_exec_retry_on_contention(&exec);
- if (r) {
- amdgpu_userq_fence_cleanup(fence);
+ if (r)
goto exec_fini;
- }
- r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+ r = drm_exec_prepare_array(&exec, gobj_write,
+ num_write_bo_handles, 1);
drm_exec_retry_on_contention(&exec);
- if (r) {
- amdgpu_userq_fence_cleanup(fence);
+ if (r)
goto exec_fini;
- }
}
- for (i = 0; i < num_read_bo_handles; i++) {
- if (!gobj_read || !gobj_read[i]->resv)
- continue;
-
- dma_resv_add_fence(gobj_read[i]->resv, fence,
+ /* And publish the new fence in the BOs and syncobj */
+ for (i = 0; i < num_read_bo_handles; i++)
+ dma_resv_add_fence(gobj_read[i]->resv, &fence->base,
DMA_RESV_USAGE_READ);
- }
- for (i = 0; i < num_write_bo_handles; i++) {
- if (!gobj_write || !gobj_write[i]->resv)
- continue;
-
- dma_resv_add_fence(gobj_write[i]->resv, fence,
+ for (i = 0; i < num_write_bo_handles; i++)
+ dma_resv_add_fence(gobj_write[i]->resv, &fence->base,
DMA_RESV_USAGE_WRITE);
- }
- /* Add the created fence to syncobj/BO's */
for (i = 0; i < num_syncobj_handles; i++)
- drm_syncobj_replace_fence(syncobj[i], fence);
+ drm_syncobj_replace_fence(syncobj[i], &fence->base);
+exec_fini:
/* drop the reference acquired in fence creation function */
- dma_fence_put(fence);
+ dma_fence_put(&fence->base);
-exec_fini:
drm_exec_fini(&exec);
+put_queue:
+ amdgpu_userq_put(queue);
put_gobj_write:
for (i = 0; i < num_write_bo_handles; i++)
drm_gem_object_put(gobj_write[i]);
@@ -609,15 +581,11 @@ put_gobj_read:
kvfree(gobj_read);
free_syncobj:
while (entry-- > 0)
- if (syncobj[entry])
- drm_syncobj_put(syncobj[entry]);
+ drm_syncobj_put(syncobj[entry]);
kfree(syncobj);
free_syncobj_handles:
kfree(syncobj_handles);
- if (queue)
- amdgpu_userq_put(queue);
-
return r;
}
@@ -892,8 +860,10 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
* Otherwise, we would gather those references until we don't
* have any more space left and crash.
*/
+ mutex_lock(&waitq->fence_drv_lock);
r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
xa_limit_32b, GFP_KERNEL);
+ mutex_unlock(&waitq->fence_drv_lock);
if (r)
goto put_waitq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
index d56246ad8c26..0bd51616cef1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -58,15 +58,12 @@ struct amdgpu_userq_fence_driver {
char timeline_name[TASK_COMM_LEN];
};
-int amdgpu_userq_fence_slab_init(void);
-void amdgpu_userq_fence_slab_fini(void);
-
void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv);
void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
struct amdgpu_userq_fence_driver **fence_drv_req);
void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq);
-void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
+int amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq);
void amdgpu_userq_fence_driver_destroy(struct kref *ref);
int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 115a7b269af3..fccd758b6699 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2002,7 +2002,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
* during user requests GEM unmap IOCTL except for forcing the unmap
* from user space.
*/
- if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0))
+ if (unlikely(bo_va->userq_va_mapped))
amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr);
list_del(&mapping->list);
@@ -3023,11 +3023,22 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
is_compute_context = vm->is_compute_context;
- if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
- node_id, addr >> PAGE_SHIFT, ts, write_fault)) {
+ if (is_compute_context) {
+ /* Unreserve root since svm_range_restore_pages might try to reserve it. */
+ /* TODO: rework svm_range_restore_pages so that this isn't necessary. */
amdgpu_bo_unreserve(root);
+
+ if (!svm_range_restore_pages(adev, pasid, vmid,
+ node_id, addr >> PAGE_SHIFT, ts, write_fault)) {
+ amdgpu_bo_unref(&root);
+ return true;
+ }
amdgpu_bo_unref(&root);
- return true;
+
+ /* Re-acquire the VM lock, could be that the VM was freed in between. */
+ vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
+ if (!vm)
+ return false;
}
addr /= AMDGPU_GPU_PAGE_SIZE;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index fd881388d612..f27f917e3cdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -562,6 +562,11 @@ static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr,
amdgpu_ring_write(ring, 0);
}
+ /* WA: Force sync after TRAP to avoid VPE1 fail to power off */
+ if (ring->adev->vpe.collaborate_mode) {
+ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COLLAB_SYNC, 0));
+ amdgpu_ring_write(ring, 0xabcd);
+ }
}
static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -968,7 +973,7 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = {
.emit_frame_size =
5 + /* vpe_ring_init_cond_exec */
6 + /* vpe_ring_emit_pipeline_sync */
- 10 + 10 + 10 + /* vpe_ring_emit_fence */
+ 12 + 12 + 12 + /* vpe_ring_emit_fence */
/* vpe_ring_emit_vm_flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 8c82e90f871b..d40ab1e95480 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -6523,15 +6523,7 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
DRM_DEBUG("IH: CP EOP\n");
if (adev->enable_mes && doorbell_offset) {
- struct amdgpu_usermode_queue *queue;
- struct xarray *xa = &adev->userq_doorbell_xa;
- unsigned long flags;
-
- xa_lock_irqsave(xa, flags);
- queue = xa_load(xa, doorbell_offset);
- if (queue)
- amdgpu_userq_fence_driver_process(queue->fence_drv);
- xa_unlock_irqrestore(xa, flags);
+ amdgpu_userq_process_fence_irq(adev, doorbell_offset);
} else {
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 65c33823a688..c35372e21261 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -602,6 +602,13 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
"amdgpu/%s_pfp.bin", ucode_prefix);
if (err)
goto out;
+
+ adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
+ (union amdgpu_firmware_header *)
+ adev->gfx.pfp_fw->data, 2, 0);
+ if (adev->gfx.rs64_enable)
+ dev_dbg(adev->dev, "CP RS64 enable\n");
+
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
@@ -4854,15 +4861,7 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
DRM_DEBUG("IH: CP EOP\n");
if (adev->enable_mes && doorbell_offset) {
- struct xarray *xa = &adev->userq_doorbell_xa;
- struct amdgpu_usermode_queue *queue;
- unsigned long flags;
-
- xa_lock_irqsave(xa, flags);
- queue = xa_load(xa, doorbell_offset);
- if (queue)
- amdgpu_userq_fence_driver_process(queue->fence_drv);
- xa_unlock_irqrestore(xa, flags);
+ amdgpu_userq_process_fence_irq(adev, doorbell_offset);
} else {
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
index 68fd3c04134d..68db1bc73bc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c
@@ -3643,16 +3643,7 @@ static int gfx_v12_1_eop_irq(struct amdgpu_device *adev,
DRM_DEBUG("IH: CP EOP\n");
if (adev->enable_mes && doorbell_offset) {
- struct xarray *xa = &adev->userq_doorbell_xa;
- struct amdgpu_usermode_queue *queue;
- unsigned long flags;
-
- xa_lock_irqsave(xa, flags);
- queue = xa_load(xa, doorbell_offset);
- if (queue)
- amdgpu_userq_fence_driver_process(queue->fence_drv);
-
- xa_unlock_irqrestore(xa, flags);
+ amdgpu_userq_process_fence_irq(adev, doorbell_offset);
} else {
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 73223d97a87f..ac90d8e9d86a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1571,6 +1571,71 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev)
mutex_unlock(&adev->grbm_idx_mutex);
}
+/**
+ * gfx_v6_0_setup_tcc() - setup which TCCs are used
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Verify whether the current GPU has any TCCs disabled,
+ * which can happen when the GPU is harvested and some
+ * memory channels are disabled, reducing the memory bus width.
+ * For example, on the Radeon HD 7870 XT (Tahiti LE).
+ *
+ * If some TCCs are disabled, we need to make sure that
+ * the disabled TCCs are not used, and the remaining TCCs
+ * are used optimally.
+ *
+ * TCP_CHAN_STEER_LO/HI control which TCC is used by TCP channels.
+ * TCP_ADDR_CONFIG.NUM_TCC_BANKS controls how many channels are used.
+ *
+ * For optimal performance:
+ * - Rely on the CHAN_STEER from the golden registers table,
+ * only skip disabled TCCs but keep the mapping order.
+ * - Limit NUM_TCC_BANKS to number of active TCCs to avoid thrashing,
+ * which performs better than using the same TCC twice.
+ */
+static void gfx_v6_0_setup_tcc(struct amdgpu_device *adev)
+{
+ u32 i, tcc, tcp_addr_config, num_active_tcc = 0;
+ u64 chan_steer, patched_chan_steer = 0;
+ const u32 num_max_tcc = adev->gfx.config.max_texture_channel_caches;
+ const u32 dis_tcc_mask =
+ amdgpu_gfx_create_bitmask(num_max_tcc) &
+ (REG_GET_FIELD(RREG32(mmCGTS_TCC_DISABLE),
+ CGTS_TCC_DISABLE, TCC_DISABLE) |
+ REG_GET_FIELD(RREG32(mmCGTS_USER_TCC_DISABLE),
+ CGTS_USER_TCC_DISABLE, TCC_DISABLE));
+
+ /* When no TCC is disabled, the golden registers table already has optimal TCC setup */
+ if (!dis_tcc_mask)
+ return;
+
+ /* Each 4-bit nibble contains the index of a TCC used by all TCPs */
+ chan_steer = RREG32(mmTCP_CHAN_STEER_LO) | ((u64)RREG32(mmTCP_CHAN_STEER_HI) << 32ull);
+
+ /* Patch the TCP to TCC mapping to skip disabled TCCs */
+ for (i = 0; i < num_max_tcc; ++i) {
+ tcc = (chan_steer >> (u64)(4 * i)) & 0xf;
+
+ if (!((1 << tcc) & dis_tcc_mask)) {
+ /* Copy enabled TCC indices to the patched register value. */
+ patched_chan_steer |= (u64)tcc << (u64)(4 * num_active_tcc);
+ ++num_active_tcc;
+ }
+ }
+
+ WARN_ON(num_active_tcc != num_max_tcc - hweight32(dis_tcc_mask));
+
+ /* Patch number of TCCs used by TCPs */
+ tcp_addr_config = REG_SET_FIELD(RREG32(mmTCP_ADDR_CONFIG),
+ TCP_ADDR_CONFIG, NUM_TCC_BANKS,
+ num_active_tcc - 1);
+
+ WREG32(mmTCP_ADDR_CONFIG, tcp_addr_config);
+ WREG32(mmTCP_CHAN_STEER_HI, upper_32_bits(patched_chan_steer));
+ WREG32(mmTCP_CHAN_STEER_LO, lower_32_bits(patched_chan_steer));
+}
+
static void gfx_v6_0_config_init(struct amdgpu_device *adev)
{
adev->gfx.config.double_offchip_lds_buf = 0;
@@ -1729,6 +1794,7 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
gfx_v6_0_tiling_mode_table_init(adev);
gfx_v6_0_setup_rb(adev);
+ gfx_v6_0_setup_tcc(adev);
gfx_v6_0_setup_spi(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 95be105671ec..86c7c2a429b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5660,9 +5660,6 @@ static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
{
struct amdgpu_device *adev = ring->adev;
- /* we only allocate 32bit for each seq wb address */
- BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
-
/* write fence seq to the "addr" */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 9fe8d10ab270..cffb1e6bab35 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -802,6 +802,7 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
+ .no_user_fence = true,
.get_rptr = jpeg_v2_0_dec_ring_get_rptr,
.get_wptr = jpeg_v2_0_dec_ring_get_wptr,
.set_wptr = jpeg_v2_0_dec_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index 20983f126b49..13a6e24c624a 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -693,6 +693,7 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = {
static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
+ .no_user_fence = true,
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
@@ -724,6 +725,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
+ .no_user_fence = true,
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index 98f5e0622bc5..d0445df39d2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -594,6 +594,7 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
+ .no_user_fence = true,
.get_rptr = jpeg_v3_0_dec_ring_get_rptr,
.get_wptr = jpeg_v3_0_dec_ring_get_wptr,
.set_wptr = jpeg_v3_0_dec_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index 0bd83820dd20..6fd4238a8471 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -759,6 +759,7 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = {
static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
+ .no_user_fence = true,
.get_rptr = jpeg_v4_0_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_dec_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 82abe181c730..0c746580de11 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -1219,6 +1219,7 @@ static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
+ .no_user_fence = true,
.get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
index 54fd9c800c40..a43582b9c876 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -804,6 +804,7 @@ static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = {
static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
+ .no_user_fence = true,
.get_rptr = jpeg_v4_0_5_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_5_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_5_dec_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
index 46bf15dce2bd..72a4b2d0676f 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
@@ -680,6 +680,7 @@ static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = {
static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
+ .no_user_fence = true,
.get_rptr = jpeg_v5_0_0_dec_ring_get_rptr,
.get_wptr = jpeg_v5_0_0_dec_ring_get_wptr,
.set_wptr = jpeg_v5_0_0_dec_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
index edecbfe66c79..250316704dfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
@@ -884,6 +884,7 @@ static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = {
static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
+ .no_user_fence = true,
.get_rptr = jpeg_v5_0_1_dec_ring_get_rptr,
.get_wptr = jpeg_v5_0_1_dec_ring_get_wptr,
.set_wptr = jpeg_v5_0_1_dec_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c
index 285c459379c4..7a4ecea6b39a 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_2.c
@@ -703,6 +703,7 @@ static const struct amd_ip_funcs jpeg_v5_0_2_ip_funcs = {
static const struct amdgpu_ring_funcs jpeg_v5_0_2_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
+ .no_user_fence = true,
.get_rptr = jpeg_v5_0_2_dec_ring_get_rptr,
.get_wptr = jpeg_v5_0_2_dec_ring_get_wptr,
.set_wptr = jpeg_v5_0_2_dec_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c
index 1821dced936f..e7546816baba 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_3_0.c
@@ -661,6 +661,7 @@ static const struct amd_ip_funcs jpeg_v5_3_0_ip_funcs = {
static const struct amdgpu_ring_funcs jpeg_v5_3_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_JPEG,
.align_mask = 0xf,
+ .no_user_fence = true,
.get_rptr = jpeg_v5_3_0_dec_ring_get_rptr,
.get_wptr = jpeg_v5_3_0_dec_ring_get_wptr,
.set_wptr = jpeg_v5_3_0_dec_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 2fc39a6938f6..5b4121ddc78c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -31,89 +31,68 @@
#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
static int
-mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
-{
- int ret;
-
- ret = amdgpu_bo_reserve(bo, true);
- if (ret) {
- DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
- goto err_reserve_bo_failed;
- }
-
- ret = amdgpu_ttm_alloc_gart(&bo->tbo);
- if (ret) {
- DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
- goto err_map_bo_gart_failed;
- }
-
- amdgpu_bo_unreserve(bo);
- bo = amdgpu_bo_ref(bo);
-
- return 0;
-
-err_map_bo_gart_failed:
- amdgpu_bo_unreserve(bo);
-err_reserve_bo_failed:
- return ret;
-}
-
-static int
mes_userq_create_wptr_mapping(struct amdgpu_device *adev,
struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue,
uint64_t wptr)
{
struct amdgpu_bo_va_mapping *wptr_mapping;
- struct amdgpu_vm *wptr_vm;
struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj;
+ struct amdgpu_bo *obj;
+ struct amdgpu_vm *vm = queue->vm;
+ struct drm_exec exec;
int ret;
- wptr_vm = queue->vm;
- ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
- if (ret)
- return ret;
-
wptr &= AMDGPU_GMC_HOLE_MASK;
- wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
- amdgpu_bo_unreserve(wptr_vm->root.bo);
- if (!wptr_mapping) {
- DRM_ERROR("Failed to lookup wptr bo\n");
- return -EINVAL;
- }
- wptr_obj->obj = wptr_mapping->bo_va->base.bo;
- if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
- DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n");
- return -EINVAL;
- }
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 2);
+ drm_exec_until_all_locked(&exec) {
+ ret = amdgpu_vm_lock_pd(vm, &exec, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto fail_lock;
+
+ wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> PAGE_SHIFT);
+ if (!wptr_mapping) {
+ ret = -EINVAL;
+ goto fail_lock;
+ }
- ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj);
- if (ret) {
- DRM_ERROR("Failed to map wptr bo to GART\n");
- return ret;
+ obj = wptr_mapping->bo_va->base.bo;
+ ret = drm_exec_lock_obj(&exec, &obj->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto fail_lock;
}
- ret = amdgpu_bo_reserve(wptr_obj->obj, true);
- if (ret) {
- DRM_ERROR("Failed to reserve wptr bo\n");
- return ret;
+ wptr_obj->obj = amdgpu_bo_ref(wptr_mapping->bo_va->base.bo);
+ if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
+ ret = -EINVAL;
+ goto fail_map;
}
/* TODO use eviction fence instead of pinning. */
ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT);
if (ret) {
- drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin wptr bo\n");
- goto unresv_bo;
+ DRM_ERROR("Failed to pin wptr bo. ret %d\n", ret);
+ goto fail_map;
+ }
+
+ ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo);
+ if (ret) {
+ DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+ goto fail_map;
}
queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj);
- amdgpu_bo_unreserve(wptr_obj->obj);
+ drm_exec_fini(&exec);
return 0;
-unresv_bo:
- amdgpu_bo_unreserve(wptr_obj->obj);
+fail_map:
+ amdgpu_bo_unref(&wptr_obj->obj);
+fail_lock:
+ drm_exec_fini(&exec);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 44f0f23e1148..e64f2f6df9a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -889,7 +889,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
/* write the fence */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
/* zero in first two bits */
- BUG_ON(addr & 0x3);
+ WARN_ON(addr & 0x3);
amdgpu_ring_write(ring, lower_32_bits(addr));
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
@@ -899,7 +899,7 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
addr += 4;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
/* zero in first two bits */
- BUG_ON(addr & 0x3);
+ WARN_ON(addr & 0x3);
amdgpu_ring_write(ring, lower_32_bits(addr));
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, upper_32_bits(seq));
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 0f530bb8a9a3..8ca46e1e474e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1662,17 +1662,8 @@ static int sdma_v6_0_process_fence_irq(struct amdgpu_device *adev,
u32 doorbell_offset = entry->src_data[0];
if (adev->enable_mes && doorbell_offset) {
- struct amdgpu_usermode_queue *queue;
- struct xarray *xa = &adev->userq_doorbell_xa;
- unsigned long flags;
-
doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
-
- xa_lock_irqsave(xa, flags);
- queue = xa_load(xa, doorbell_offset);
- if (queue)
- amdgpu_userq_fence_driver_process(queue->fence_drv);
- xa_unlock_irqrestore(xa, flags);
+ amdgpu_userq_process_fence_irq(adev, doorbell_offset);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 9ed817b69a3b..37191e2918d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -1594,17 +1594,8 @@ static int sdma_v7_0_process_fence_irq(struct amdgpu_device *adev,
u32 doorbell_offset = entry->src_data[0];
if (adev->enable_mes && doorbell_offset) {
- struct xarray *xa = &adev->userq_doorbell_xa;
- struct amdgpu_usermode_queue *queue;
- unsigned long flags;
-
doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
-
- xa_lock_irqsave(xa, flags);
- queue = xa_load(xa, doorbell_offset);
- if (queue)
- amdgpu_userq_fence_driver_process(queue->fence_drv);
- xa_unlock_irqrestore(xa, flags);
+ amdgpu_userq_process_fence_irq(adev, doorbell_offset);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
index fea576a7f397..efb3fde919ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
@@ -242,6 +242,10 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev)
uint64_t addr;
uint32_t size;
+ /* When the keyselect is already set, don't perturb it. */
+ if (RREG32(mmUVD_FW_START))
+ return;
+
/* program the VCPU memory controller bits 0-27 */
addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
@@ -284,6 +288,12 @@ static int uvd_v3_1_fw_validate(struct amdgpu_device *adev)
int i;
uint32_t keysel = adev->uvd.keyselect;
+ if (RREG32(mmUVD_FW_START) & UVD_FW_STATUS__PASS_MASK) {
+ dev_dbg(adev->dev, "UVD keyselect already set: 0x%x (on CPU: 0x%x)\n",
+ RREG32(mmUVD_FW_START), adev->uvd.keyselect);
+ return 0;
+ }
+
WREG32(mmUVD_FW_START, keysel);
for (i = 0; i < 10; ++i) {
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
index 5b7b46d242c6..93253db5e2de 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c
@@ -42,9 +42,10 @@
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+#define VCE_V1_0_ALIGNMENT (32 * 1024)
#define VCE_V1_0_FW_SIZE (256 * 1024)
#define VCE_V1_0_STACK_SIZE (64 * 1024)
-#define VCE_V1_0_DATA_SIZE (7808 * (AMDGPU_MAX_VCE_HANDLES + 1))
+#define VCE_V1_0_DATA_SIZE (ALIGN(7808 * (AMDGPU_MAX_VCE_HANDLES + 1), VCE_V1_0_ALIGNMENT))
#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -177,7 +178,7 @@ static void vce_v1_0_init_cg(struct amdgpu_device *adev)
}
/**
- * vce_v1_0_load_fw_signature - load firmware signature into VCPU BO
+ * vce_v1_0_load_fw() - load firmware signature into VCPU BO
*
* @adev: amdgpu_device pointer
*
@@ -185,21 +186,26 @@ static void vce_v1_0_init_cg(struct amdgpu_device *adev)
* This function finds the signature appropriate for the current
* ASIC and writes that into the VCPU BO.
*/
-static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev)
+static int vce_v1_0_load_fw(struct amdgpu_device *adev)
{
const struct common_firmware_header *hdr;
struct vce_v1_0_fw_signature *sign;
- unsigned int ucode_offset;
+ u32 ucode_offset;
+ u32 ucode_size;
uint32_t chip_id;
u32 *cpu_addr;
int i;
hdr = (const struct common_firmware_header *)adev->vce.fw->data;
ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ ucode_size = hdr->ucode_size_bytes - sizeof(struct vce_v1_0_fw_signature *);
cpu_addr = adev->vce.cpu_addr;
sign = (void *)adev->vce.fw->data + ucode_offset;
+ if (ucode_size > VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET)
+ return -EINVAL;
+
switch (adev->asic_type) {
case CHIP_TAHITI:
chip_id = 0x01000014;
@@ -226,12 +232,14 @@ static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev)
return -EINVAL;
}
+ memset_io(&cpu_addr[0], 0, amdgpu_bo_size(adev->vce.vcpu_bo));
+
cpu_addr += (256 - 64) / 4;
memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16);
cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64);
memset_io(&cpu_addr[5], 0, 44);
- memcpy_toio(&cpu_addr[16], &sign[1], hdr->ucode_size_bytes - sizeof(*sign));
+ memcpy_toio(&cpu_addr[16], &sign[1], ucode_size);
cpu_addr += (le32_to_cpu(sign->length) + 64) / 4;
memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16);
@@ -312,18 +320,23 @@ static int vce_v1_0_mc_resume(struct amdgpu_device *adev)
WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES);
offset = adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET;
- size = VCE_V1_0_FW_SIZE;
- WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
+ size = VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET;
+ WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset);
WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
offset += size;
size = VCE_V1_0_STACK_SIZE;
- WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
+ WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT));
+ WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT));
+ WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset);
WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
offset += size;
size = VCE_V1_0_DATA_SIZE;
- WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
+ WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT));
+ WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT));
+ WARN_ON((offset + size - adev->vce.gpu_addr) > amdgpu_bo_size(adev->vce.vcpu_bo));
+ WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset);
WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
@@ -527,22 +540,31 @@ static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block)
* To accomodate that, we put GART to the LOW address range
* and reserve some GART pages where we map the VCPU BO,
* so that it gets a 32-bit address.
+ *
+ * The BAR address is zero and we can't change it
+ * due to the firmware validation mechanism.
+ * It seems that it fails to initialize if the address is >= 128 MiB.
*/
static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
{
u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo);
- u64 max_vcpu_bo_addr = 0xffffffff - bo_size;
+ u64 max_vcpu_bo_addr = 0x07ffffff - bo_size;
u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE;
u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID;
u64 vce_gart_start_offs;
int r;
- r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr,
- &adev->vce.gart_node, num_pages,
- DRM_MM_INSERT_LOW);
- if (r)
- return r;
+ if (adev->gmc.vram_start < adev->gmc.gart_start)
+ return amdgpu_bo_gpu_offset(adev->vce.vcpu_bo) <= max_vcpu_bo_addr ? 0 : -EINVAL;
+
+ if (!drm_mm_node_allocated(&adev->vce.gart_node)) {
+ r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr,
+ &adev->vce.gart_node, num_pages,
+ DRM_MM_INSERT_LOW);
+ if (r)
+ return r;
+ }
vce_gart_start_offs = amdgpu_gtt_node_to_byte_offset(&adev->vce.gart_node);
@@ -553,8 +575,6 @@ static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
amdgpu_gart_map_vram_range(adev, pa, adev->vce.gart_node.start,
num_pages, flags, adev->gart.ptr);
adev->vce.gpu_addr = adev->gmc.gart_start + vce_gart_start_offs;
- if (adev->vce.gpu_addr > max_vcpu_bo_addr)
- return -EINVAL;
return 0;
}
@@ -574,10 +594,7 @@ static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
- r = amdgpu_vce_resume(adev);
- if (r)
- return r;
- r = vce_v1_0_load_fw_signature(adev);
+ r = vce_v1_0_load_fw(adev);
if (r)
return r;
r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
@@ -696,10 +713,7 @@ static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
int r;
- r = amdgpu_vce_resume(adev);
- if (r)
- return r;
- r = vce_v1_0_load_fw_signature(adev);
+ r = vce_v1_0_load_fw(adev);
if (r)
return r;
r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index db149eda6204..3a6fc8604108 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -37,9 +37,14 @@
#include "oss/oss_2_0_d.h"
#include "oss/oss_2_0_sh_mask.h"
+
+/* Use 24K to be safe. The FW supposedly only requires 23744 bytes. */
+#define VCE_V2_0_DATA_ENTRY_SIZE (24 * 1024)
+
#define VCE_V2_0_FW_SIZE (256 * 1024)
#define VCE_V2_0_STACK_SIZE (64 * 1024)
-#define VCE_V2_0_DATA_SIZE (23552 * AMDGPU_MAX_VCE_HANDLES)
+#define VCE_V2_0_DATA_SIZE (VCE_V2_0_DATA_ENTRY_SIZE * (AMDGPU_MAX_VCE_HANDLES + 1))
+
#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -183,7 +188,7 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev)
WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
offset = AMDGPU_VCE_FIRMWARE_OFFSET;
- size = VCE_V2_0_FW_SIZE;
+ size = VCE_V2_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET;
WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 03d79e464f04..c69f7d82060f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -574,7 +574,7 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
} else
WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
offset = AMDGPU_VCE_FIRMWARE_OFFSET;
- size = VCE_V3_0_FW_SIZE;
+ size = VCE_V3_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET;
WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index e35fae9cdaf6..0442bfcfd384 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -2113,6 +2113,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
+ .no_user_fence = true,
.secure_submission_supported = true,
.get_rptr = vcn_v2_0_dec_ring_get_rptr,
.get_wptr = vcn_v2_0_dec_ring_get_wptr,
@@ -2145,6 +2146,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
+ .no_user_fence = true,
.get_rptr = vcn_v2_0_enc_ring_get_rptr,
.get_wptr = vcn_v2_0_enc_ring_get_wptr,
.set_wptr = vcn_v2_0_enc_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 006a15451197..8b8184fe6764 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -1778,6 +1778,7 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
+ .no_user_fence = true,
.secure_submission_supported = true,
.get_rptr = vcn_v2_5_dec_ring_get_rptr,
.get_wptr = vcn_v2_5_dec_ring_get_wptr,
@@ -1879,6 +1880,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
+ .no_user_fence = true,
.get_rptr = vcn_v2_5_enc_ring_get_rptr,
.get_wptr = vcn_v2_5_enc_ring_get_wptr,
.set_wptr = vcn_v2_5_enc_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 6fb4fcdbba4f..81bba3ec2a93 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -1856,6 +1856,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0x3f,
.nop = VCN_DEC_SW_CMD_NO_OP,
+ .no_user_fence = true,
.secure_submission_supported = true,
.get_rptr = vcn_v3_0_dec_ring_get_rptr,
.get_wptr = vcn_v3_0_dec_ring_get_wptr,
@@ -1972,6 +1973,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
uint32_t offset, size, *create;
+ uint64_t buf_end;
if (msg[0] != RDECODE_MESSAGE_CREATE)
continue;
@@ -1979,7 +1981,8 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
offset = msg[1];
size = msg[2];
- if (size < 4 || offset + size > end - addr) {
+ if (size < 4 || check_add_overflow(offset, size, &buf_end) ||
+ buf_end > end - addr) {
DRM_ERROR("VCN message buffer exceeds BO bounds!\n");
r = -EINVAL;
goto out;
@@ -2036,6 +2039,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
+ .no_user_fence = true,
.secure_submission_supported = true,
.get_rptr = vcn_v3_0_dec_ring_get_rptr,
.get_wptr = vcn_v3_0_dec_ring_get_wptr,
@@ -2138,6 +2142,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
+ .no_user_fence = true,
.get_rptr = vcn_v3_0_enc_ring_get_rptr,
.get_wptr = vcn_v3_0_enc_ring_get_wptr,
.set_wptr = vcn_v3_0_enc_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 5dec92691f73..ff7269bafae8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -1889,6 +1889,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
uint32_t offset, size, *create;
+ uint64_t buf_end;
if (msg[0] != RDECODE_MESSAGE_CREATE)
continue;
@@ -1896,7 +1897,8 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
offset = msg[1];
size = msg[2];
- if (size < 4 || offset + size > end - addr) {
+ if (size < 4 || check_add_overflow(offset, size, &buf_end) ||
+ buf_end > end - addr) {
DRM_ERROR("VCN message buffer exceeds BO bounds!\n");
r = -EINVAL;
goto out;
@@ -1994,6 +1996,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
+ .no_user_fence = true,
.extra_bytes = sizeof(struct amdgpu_vcn_rb_metadata),
.get_rptr = vcn_v4_0_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_unified_ring_get_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index ff3013b97abd..10e8fc2821f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -1775,6 +1775,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
+ .no_user_fence = true,
.get_rptr = vcn_v4_0_3_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_3_unified_ring_get_wptr,
.set_wptr = vcn_v4_0_3_unified_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index 1f6a22983c0d..1571cc5a148c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -1483,6 +1483,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
+ .no_user_fence = true,
.get_rptr = vcn_v4_0_5_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_5_unified_ring_get_wptr,
.set_wptr = vcn_v4_0_5_unified_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
index 6109124f852e..d5f49fa33bee 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -1207,6 +1207,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
+ .no_user_fence = true,
.get_rptr = vcn_v5_0_0_unified_ring_get_rptr,
.get_wptr = vcn_v5_0_0_unified_ring_get_wptr,
.set_wptr = vcn_v5_0_0_unified_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
index c28c6aff17aa..54fbf8d73ca6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -1419,6 +1419,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
+ .no_user_fence = true,
.get_rptr = vcn_v5_0_1_unified_ring_get_rptr,
.get_wptr = vcn_v5_0_1_unified_ring_get_wptr,
.set_wptr = vcn_v5_0_1_unified_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c
index c3d3cc023058..bbc172db91a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_2.c
@@ -994,6 +994,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_2_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
+ .no_user_fence = true,
.get_rptr = vcn_v5_0_2_unified_ring_get_rptr,
.get_wptr = vcn_v5_0_2_unified_ring_get_wptr,
.set_wptr = vcn_v5_0_2_unified_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 55ea5145a28a..03b266b26738 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -25,6 +25,7 @@
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/file.h>
+#include <linux/overflow.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
@@ -66,6 +67,21 @@ static const struct class kfd_class = {
.name = kfd_dev_name,
};
+/*
+ * Cache the address space of the chardev on first open so that the reset
+ * path can drop all userspace mappings of doorbell and MMIO ranges via
+ * unmap_mapping_range().
+ */
+static struct address_space *kfd_dev_mapping;
+
+void kfd_dev_unmap_mapping_range(loff_t const holebegin, loff_t const holelen)
+{
+ struct address_space *mapping = READ_ONCE(kfd_dev_mapping);
+
+ if (mapping)
+ unmap_mapping_range(mapping, holebegin, holelen, 1);
+}
+
static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
{
struct kfd_process_device *pdd;
@@ -132,6 +148,13 @@ static int kfd_open(struct inode *inode, struct file *filep)
if (iminor(inode) != 0)
return -ENODEV;
+ /*
+ * /dev/kfd is a single chardev so all opens share one inode. Cache
+ * its address_space on the first open for use by the reset path.
+ */
+ if (!READ_ONCE(kfd_dev_mapping))
+ cmpxchg(&kfd_dev_mapping, NULL, inode->i_mapping);
+
is_32bit_user_mode = in_compat_syscall();
if (is_32bit_user_mode) {
@@ -1359,7 +1382,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
if (WARN_ON_ONCE(!peer_pdd))
continue;
- kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
+ kfd_flush_tlb(peer_pdd);
}
kfree(devices_arr);
@@ -1454,7 +1477,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
if (WARN_ON_ONCE(!peer_pdd))
continue;
if (flush_tlb)
- kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
+ kfd_flush_tlb(peer_pdd);
/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */
err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
@@ -1695,6 +1718,16 @@ static int kfd_ioctl_smi_events(struct file *filep,
return kfd_smi_event_open(pdd->dev, &args->anon_fd);
}
+static int kfd_ioctl_svm_validate(void *kdata, unsigned int usize)
+{
+ struct kfd_ioctl_svm_args *args = kdata;
+ size_t expected = struct_size(args, attrs, args->nattr);
+
+ if (expected == SIZE_MAX || usize < expected)
+ return -EINVAL;
+ return 0;
+}
+
#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
static int kfd_ioctl_set_xnack_mode(struct file *filep,
@@ -3209,7 +3242,11 @@ static int kfd_ioctl_create_process(struct file *filep, struct kfd_process *p, v
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
- .cmd_drv = 0, .name = #ioctl}
+ .validate = NULL, .cmd_drv = 0, .name = #ioctl}
+
+#define AMDKFD_IOCTL_DEF_V(ioctl, _func, _validate, _flags) \
+ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
+ .validate = _validate, .cmd_drv = 0, .name = #ioctl}
/** Ioctl table */
static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
@@ -3306,7 +3343,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
kfd_ioctl_smi_events, 0),
- AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
+ AMDKFD_IOCTL_DEF_V(AMDKFD_IOC_SVM, kfd_ioctl_svm,
+ kfd_ioctl_svm_validate, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
kfd_ioctl_set_xnack_mode, 0),
@@ -3431,6 +3469,12 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
memset(kdata, 0, usize);
}
+ if (ioctl->validate) {
+ retcode = ioctl->validate(kdata, usize);
+ if (retcode)
+ goto err_i1;
+ }
+
retcode = func(filep, process, kdata);
if (cmd & IOC_OUT)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 8ff97bf7d95a..b7f8f7ff8198 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -1737,37 +1737,6 @@ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entr
return false;
}
-/* check if there is kfd process still uses adev */
-static bool kgd2kfd_check_device_idle(struct amdgpu_device *adev)
-{
- struct kfd_process *p;
- struct hlist_node *p_temp;
- unsigned int temp;
- struct kfd_node *dev;
-
- mutex_lock(&kfd_processes_mutex);
-
- if (hash_empty(kfd_processes_table)) {
- mutex_unlock(&kfd_processes_mutex);
- return true;
- }
-
- /* check if there is device still use adev */
- hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
- for (int i = 0; i < p->n_pdds; i++) {
- dev = p->pdds[i]->dev;
- if (dev->adev == adev) {
- mutex_unlock(&kfd_processes_mutex);
- return false;
- }
- }
- }
-
- mutex_unlock(&kfd_processes_mutex);
-
- return true;
-}
-
/** kgd2kfd_teardown_processes - gracefully tear down existing
* kfd processes that use adev
*
@@ -1800,7 +1769,7 @@ void kgd2kfd_teardown_processes(struct amdgpu_device *adev)
mutex_unlock(&kfd_processes_mutex);
/* wait all kfd processes use adev terminate */
- while (!kgd2kfd_check_device_idle(adev))
+ while (!!atomic_read(&adev->kfd.dev->kfd_processes_count))
cond_resched();
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index ab3b2e7be9bd..e0a31e11f0ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -475,6 +475,9 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
} else {
/* For CP queues on SOC15 */
if (restore_id) {
+ if (*restore_id >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+ return -EINVAL;
+
/* make sure that ID is free */
if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
return -EINVAL;
@@ -572,7 +575,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
qpd->vmid,
qpd->page_table_base);
/* invalidate the VM context after pasid and vmid mapping is set up */
- kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
+ kfd_flush_tlb(qpd_to_pdd(qpd));
if (dqm->dev->kfd2kgd->set_scratch_backing_va)
dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
@@ -610,7 +613,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
if (flush_texture_cache_nocpsch(q->device, qpd))
dev_err(dev, "Failed to flush TC\n");
- kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
+ kfd_flush_tlb(qpd_to_pdd(qpd));
/* Release the vmid mapping */
set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
@@ -1284,7 +1287,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
dqm->dev->adev,
qpd->vmid,
qpd->page_table_base);
- kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
+ kfd_flush_tlb(pdd);
}
/* Take a safe reference to the mm_struct, which may otherwise
@@ -1587,6 +1590,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
}
if (restore_sdma_id) {
+ if (*restore_sdma_id >= get_num_sdma_queues(dqm))
+ return -EINVAL;
+
/* Re-use existing sdma_id */
if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) {
dev_err(dev, "SDMA queue already in use\n");
@@ -1613,6 +1619,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
return -ENOMEM;
}
if (restore_sdma_id) {
+ if (*restore_sdma_id >= get_num_xgmi_sdma_queues(dqm))
+ return -EINVAL;
+
/* Re-use existing sdma_id */
if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) {
dev_err(dev, "SDMA queue already in use\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index e8f97de9d6e4..f6d9d81003dc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -364,11 +364,15 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
{
struct v9_mqd *m;
struct kfd_context_save_area_header header;
+ u32 cntl_stack_size;
+ u32 cntl_stack_offset;
/* Control stack is located one page after MQD. */
void *mqd_ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE);
m = get_mqd(mqd);
+ cntl_stack_size = min_t(u32, m->cp_hqd_cntl_stack_size, q->ctl_stack_size);
+ cntl_stack_offset = min_t(u32, m->cp_hqd_cntl_stack_offset, cntl_stack_size);
*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
m->cp_hqd_cntl_stack_offset;
@@ -384,9 +388,10 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
return -EFAULT;
- if (copy_to_user(ctl_stack + m->cp_hqd_cntl_stack_offset,
- mqd_ctl_stack + m->cp_hqd_cntl_stack_offset,
- *ctl_stack_used_size))
+ *ctl_stack_used_size = cntl_stack_size - cntl_stack_offset;
+
+ if (copy_to_user(ctl_stack + cntl_stack_offset, mqd_ctl_stack + cntl_stack_offset,
+ *ctl_stack_used_size))
return -EFAULT;
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6e333bfa17d6..d5b07789eda4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -395,6 +395,7 @@ enum kfd_mempool {
/* Character device interface */
int kfd_chardev_init(void);
void kfd_chardev_exit(void);
+void kfd_dev_unmap_mapping_range(loff_t const holebegin, loff_t const holelen);
/**
* enum kfd_unmap_queues_filter - Enum for queue filters.
@@ -1047,10 +1048,13 @@ extern struct srcu_struct kfd_processes_srcu;
typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p,
void *data);
+typedef int amdkfd_ioctl_validate_t(void *kdata, unsigned int usize);
+
struct amdkfd_ioctl_desc {
unsigned int cmd;
int flags;
amdkfd_ioctl_t *func;
+ amdkfd_ioctl_validate_t *validate;
unsigned int cmd_drv;
const char *name;
};
@@ -1551,13 +1555,13 @@ void kfd_signal_reset_event(struct kfd_node *dev);
void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
void kfd_signal_process_terminate_event(struct kfd_process *p);
-static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
- enum TLB_FLUSH_TYPE type)
+static inline void kfd_flush_tlb(struct kfd_process_device *pdd)
{
struct amdgpu_device *adev = pdd->dev->adev;
struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
- amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask);
+ amdgpu_vm_flush_compute_tlb(adev, vm, TLB_FLUSH_HEAVYWEIGHT,
+ pdd->dev->xcc_mask);
}
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index b120fdb0ef77..35ec67d9739b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1366,6 +1366,12 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
pr_debug("CPU[0x%llx 0x%llx] -> GPU[0x%llx 0x%llx]\n", start, last,
gpu_start, gpu_end);
+
+ if (!amdgpu_vm_ready(vm)) {
+ pr_debug("VM not ready, canceling unmap\n");
+ return -EINVAL;
+ }
+
return amdgpu_vm_update_range(adev, vm, false, true, true, false, NULL, gpu_start,
gpu_end, init_pte_value, 0, 0, NULL, NULL,
fence);
@@ -1418,7 +1424,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
if (r)
break;
}
- kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT);
+ kfd_flush_tlb(pdd);
}
return r;
@@ -1443,6 +1449,11 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
last_start, last_start + npages - 1, readonly);
+ if (!amdgpu_vm_ready(vm)) {
+ pr_debug("VM not ready, canceling map\n");
+ return -EINVAL;
+ }
+
for (i = offset; i < offset + npages; i++) {
uint64_t gpu_start;
uint64_t gpu_end;
@@ -1560,7 +1571,7 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
}
}
- kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
+ kfd_flush_tlb(pdd);
}
return r;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e96a12ff2d31..5fc5d5608506 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1903,7 +1903,11 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
goto error;
}
- init_data.asic_id.chip_family = adev->family;
+ /* special handling for early revisions of GC 11.5.4 */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4))
+ init_data.asic_id.chip_family = AMDGPU_FAMILY_GC_11_5_4;
+ else
+ init_data.asic_id.chip_family = adev->family;
init_data.asic_id.pci_revision_id = adev->pdev->revision;
init_data.asic_id.hw_internal_rev = adev->external_rev_id;
@@ -9404,9 +9408,21 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
if (acrtc_state) {
timing = &acrtc_state->stream->timing;
- if (amdgpu_ip_version(adev, DCE_HWIP, 0) <
- IP_VERSION(3, 5, 0) ||
- !(adev->flags & AMD_IS_APU)) {
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >=
+ IP_VERSION(3, 2, 0) &&
+ !(adev->flags & AMD_IS_APU)) {
+ /*
+ * DGPUs NV3x and newer that support idle optimizations
+ * experience intermittent flip-done timeouts on cursor
+ * updates. Restore 5s offdelay behavior for now.
+ *
+ * Discussion on the issue:
+ * https://lore.kernel.org/amd-gfx/20260217191632.1243826-1-sysdadmin@m1k.cloud/
+ */
+ config.offdelay_ms = 5000;
+ config.disable_immediate = false;
+ } else if (amdgpu_ip_version(adev, DCE_HWIP, 0) <
+ IP_VERSION(3, 5, 0)) {
/*
* Older HW and DGPU have issues with instant off;
* use a 2 frame offdelay.
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 3b8ae7798a93..a3cb05490dc9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -1032,6 +1032,45 @@ dm_helpers_read_acpi_edid(struct amdgpu_dm_connector *aconnector)
return drm_edid_read_custom(connector, dm_helpers_probe_acpi_edid, connector);
}
+static const struct drm_edid *
+dm_helpers_read_vbios_hardcoded_edid(struct dc_link *link, struct amdgpu_dm_connector *aconnector)
+{
+ struct dc_bios *bios = link->ctx->dc_bios;
+ struct embedded_panel_info info;
+ const struct drm_edid *edid;
+ enum bp_result r;
+
+ if (!dc_is_embedded_signal(link->connector_signal) ||
+ !bios->funcs->get_embedded_panel_info)
+ return NULL;
+
+ memset(&info, 0, sizeof(info));
+ r = bios->funcs->get_embedded_panel_info(bios, &info);
+
+ if (r != BP_RESULT_OK) {
+ dm_error("Error when reading embedded panel info: %u\n", r);
+ return NULL;
+ }
+
+ if (!info.fake_edid || !info.fake_edid_size) {
+ dm_error("Embedded panel info doesn't contain an EDID\n");
+ return NULL;
+ }
+
+ edid = drm_edid_alloc(info.fake_edid, info.fake_edid_size);
+
+ if (!drm_edid_valid(edid)) {
+ dm_error("EDID from embedded panel info is invalid\n");
+ drm_edid_free(edid);
+ return NULL;
+ }
+
+ aconnector->base.display_info.width_mm = info.panel_width_mm;
+ aconnector->base.display_info.height_mm = info.panel_height_mm;
+
+ return edid;
+}
+
void populate_hdmi_info_from_connector(struct drm_hdmi_info *hdmi, struct dc_edid_caps *edid_caps)
{
edid_caps->scdc_present = hdmi->scdc.supported;
@@ -1052,6 +1091,9 @@ enum dc_edid_status dm_helpers_read_local_edid(
if (link->aux_mode)
ddc = &aconnector->dm_dp_aux.aux.ddc;
+ else if (link->ddc_hw_inst == GPIO_DDC_LINE_UNKNOWN &&
+ dc_is_embedded_signal(link->connector_signal))
+ ddc = NULL;
else
ddc = &aconnector->i2c->base;
@@ -1065,6 +1107,8 @@ enum dc_edid_status dm_helpers_read_local_edid(
drm_edid = dm_helpers_read_acpi_edid(aconnector);
if (drm_edid)
drm_info(connector->dev, "Using ACPI provided EDID for %s\n", connector->name);
+ else if (!ddc)
+ drm_edid = dm_helpers_read_vbios_hardcoded_edid(link, aconnector);
else
drm_edid = drm_edid_read_ddc(connector, ddc);
drm_edid_connector_update(connector, drm_edid);
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
index dd362071a6c9..c307f42fe0b9 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
@@ -794,11 +794,13 @@ static enum bp_result bios_parser_external_encoder_control(
static enum bp_result bios_parser_dac_load_detection(
struct dc_bios *dcb,
- enum engine_id engine_id)
+ enum engine_id engine_id,
+ struct graphics_object_id ext_enc_id)
{
struct bios_parser *bp = BP_FROM_DCB(dcb);
struct dc_context *ctx = dcb->ctx;
struct bp_load_detection_parameters bp_params = {0};
+ struct bp_external_encoder_control ext_cntl = {0};
enum bp_result bp_result = BP_RESULT_UNSUPPORTED;
uint32_t bios_0_scratch;
uint32_t device_id_mask = 0;
@@ -824,6 +826,13 @@ static enum bp_result bios_parser_dac_load_detection(
bp_params.engine_id = engine_id;
bp_result = bp->cmd_tbl.dac_load_detection(bp, &bp_params);
+ } else if (ext_enc_id.id) {
+ if (!bp->cmd_tbl.external_encoder_control)
+ return BP_RESULT_UNSUPPORTED;
+
+ ext_cntl.action = EXTERNAL_ENCODER_CONTROL_DAC_LOAD_DETECT;
+ ext_cntl.encoder_id = ext_enc_id;
+ bp_result = bp->cmd_tbl.external_encoder_control(bp, &ext_cntl);
}
if (bp_result != BP_RESULT_OK)
@@ -1304,6 +1313,60 @@ static enum bp_result bios_parser_get_embedded_panel_info(
return BP_RESULT_FAILURE;
}
+static enum bp_result get_embedded_panel_extra_info(
+ struct bios_parser *bp,
+ struct embedded_panel_info *info,
+ const uint32_t table_offset)
+{
+ uint8_t *record = bios_get_image(&bp->base, table_offset, 1);
+ ATOM_PANEL_RESOLUTION_PATCH_RECORD *panel_res_record;
+ ATOM_FAKE_EDID_PATCH_RECORD *fake_edid_record;
+
+ while (*record != ATOM_RECORD_END_TYPE) {
+ switch (*record) {
+ case LCD_MODE_PATCH_RECORD_MODE_TYPE:
+ record += sizeof(ATOM_PATCH_RECORD_MODE);
+ break;
+ case LCD_RTS_RECORD_TYPE:
+ record += sizeof(ATOM_LCD_RTS_RECORD);
+ break;
+ case LCD_CAP_RECORD_TYPE:
+ record += sizeof(ATOM_LCD_MODE_CONTROL_CAP);
+ break;
+ case LCD_FAKE_EDID_PATCH_RECORD_TYPE:
+ fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record;
+ if (fake_edid_record->ucFakeEDIDLength) {
+ if (fake_edid_record->ucFakeEDIDLength == 128)
+ info->fake_edid_size =
+ fake_edid_record->ucFakeEDIDLength;
+ else
+ info->fake_edid_size =
+ fake_edid_record->ucFakeEDIDLength * 128;
+
+ info->fake_edid = fake_edid_record->ucFakeEDIDString;
+
+ record += struct_size(fake_edid_record,
+ ucFakeEDIDString,
+ info->fake_edid_size);
+ } else {
+ /* empty fake edid record must be 3 bytes long */
+ record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1;
+ }
+ break;
+ case LCD_PANEL_RESOLUTION_RECORD_TYPE:
+ panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record;
+ info->panel_width_mm = panel_res_record->usHSize;
+ info->panel_height_mm = panel_res_record->usVSize;
+ record += sizeof(ATOM_PANEL_RESOLUTION_PATCH_RECORD);
+ break;
+ default:
+ return BP_RESULT_BADBIOSTABLE;
+ }
+ }
+
+ return BP_RESULT_OK;
+}
+
static enum bp_result get_embedded_panel_info_v1_2(
struct bios_parser *bp,
struct embedded_panel_info *info)
@@ -1420,6 +1483,10 @@ static enum bp_result get_embedded_panel_info_v1_2(
if (ATOM_PANEL_MISC_API_ENABLED & lvds->ucLVDS_Misc)
info->lcd_timing.misc_info.API_ENABLED = true;
+ if (lvds->usExtInfoTableOffset)
+ return get_embedded_panel_extra_info(bp, info,
+ le16_to_cpu(lvds->usExtInfoTableOffset) + DATA_TABLES(LCD_Info));
+
return BP_RESULT_OK;
}
@@ -1545,6 +1612,10 @@ static enum bp_result get_embedded_panel_info_v1_3(
(uint32_t) (ATOM_PANEL_MISC_V13_GREY_LEVEL &
lvds->ucLCD_Misc) >> ATOM_PANEL_MISC_V13_GREY_LEVEL_SHIFT;
+ if (lvds->usExtInfoTableOffset)
+ return get_embedded_panel_extra_info(bp, info,
+ le16_to_cpu(lvds->usExtInfoTableOffset) + DATA_TABLES(LCD_Info));
+
return BP_RESULT_OK;
}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index a1c08e1cc411..c51c4b2c6fae 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -493,6 +493,10 @@ static enum bp_result get_gpio_i2c_info(
- sizeof(struct atom_common_table_header))
/ sizeof(struct atom_gpio_pin_assignment);
+ if (!bios_get_image(&bp->base, DATA_TABLES(gpio_pin_lut),
+ le16_to_cpu(header->table_header.structuresize)))
+ return BP_RESULT_BADBIOSTABLE;
+
pin = (struct atom_gpio_pin_assignment *) header->gpio_pin;
for (table_index = 0; table_index < count; table_index++) {
@@ -681,6 +685,11 @@ static enum bp_result bios_parser_get_gpio_pin_info(
count = (le16_to_cpu(header->table_header.structuresize)
- sizeof(struct atom_common_table_header))
/ sizeof(struct atom_gpio_pin_assignment);
+
+ if (!bios_get_image(&bp->base, DATA_TABLES(gpio_pin_lut),
+ le16_to_cpu(header->table_header.structuresize)))
+ return BP_RESULT_BADBIOSTABLE;
+
for (i = 0; i < count; ++i) {
if (header->gpio_pin[i].gpio_id != gpio_id)
continue;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c
index 8d2cf95ae739..e00dc05c2d9d 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c
@@ -37,10 +37,13 @@ uint8_t *bios_get_image(struct dc_bios *bp,
uint32_t offset,
uint32_t size)
{
- if (bp->bios && offset + size < bp->bios_size)
- return bp->bios + offset;
- else
+ if (!bp->bios)
return NULL;
+
+ if (offset > bp->bios_size || size > bp->bios_size - offset)
+ return NULL;
+
+ return bp->bios + offset;
}
#include "reg_helper.h"
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 419f894c87b0..b3530fbf32f7 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -6071,7 +6071,11 @@ bool dc_process_dmub_aux_transfer_async(struct dc *dc,
uint8_t action;
union dmub_rb_cmd cmd = {0};
- ASSERT(payload->length <= 16);
+ if (link_index >= dc->link_count || !dc->links[link_index])
+ return false;
+
+ if (payload->length > sizeof(cmd.dp_aux_access.aux_control.dpaux.data))
+ return false;
cmd.dp_aux_access.header.type = DMUB_CMD__DP_AUX_ACCESS;
cmd.dp_aux_access.header.payload_bytes = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 7f55ba09b191..37714d4371fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1682,7 +1682,7 @@ struct dc_scratch_space {
struct dc_link_training_overrides preferred_training_settings;
struct dp_audio_test_data audio_test_data;
- uint8_t ddc_hw_inst;
+ enum gpio_ddc_line ddc_hw_inst;
uint8_t hpd_src;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
index 6f96c5cf39fe..526f71616f94 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h
@@ -102,7 +102,8 @@ struct dc_vbios_funcs {
struct bp_external_encoder_control *cntl);
enum bp_result (*dac_load_detection)(
struct dc_bios *bios,
- enum engine_id engine_id);
+ enum engine_id engine_id,
+ struct graphics_object_id ext_enc_id);
enum bp_result (*transmitter_control)(
struct dc_bios *bios,
struct bp_transmitter_control *cntl);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
index 5f40ae9e3120..e15fd1454d3b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
@@ -1102,7 +1102,9 @@ void dce110_link_encoder_hw_init(
ASSERT(result == BP_RESULT_OK);
}
- aux_initialize(enc110);
+
+ if (enc110->aux_regs)
+ aux_initialize(enc110);
/* reinitialize HPD.
* hpd_initialize() will pass DIG_FE id to HW context.
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c
index 6f2a0d5d963b..62fe5c3b18dc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.c
@@ -40,8 +40,8 @@
#define FN(reg_name, field_name) \
mcif_wb30->mcif_wb_shift->field_name, mcif_wb30->mcif_wb_mask->field_name
-#define MCIF_ADDR(addr) (((unsigned long long)addr & 0xffffffffff) + 0xFE) >> 8
-#define MCIF_ADDR_HIGH(addr) (unsigned long long)addr >> 40
+#define MCIF_ADDR(addr) ((uint32_t)((((unsigned long long)(addr) & 0xffffffffffULL) + 0xFEULL) >> 8))
+#define MCIF_ADDR_HIGH(addr) ((uint32_t)(((unsigned long long)(addr)) >> 40))
/* wbif programming guide:
* 1. set up wbif parameter:
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
index a2c46350e44e..95f8b7c7d657 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
@@ -646,6 +646,9 @@ failure:
enum gpio_ddc_line dal_ddc_get_line(
const struct ddc *ddc)
{
+ if (!ddc)
+ return GPIO_DDC_LINE_UNKNOWN;
+
return (enum gpio_ddc_line)dal_gpio_get_enum(ddc->pin_data);
}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 5273ca09fe12..f0abbb7c2cb2 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -665,16 +665,45 @@ void dce110_update_info_frame(struct pipe_ctx *pipe_ctx)
}
static void
-dce110_dac_encoder_control(struct pipe_ctx *pipe_ctx, bool enable)
+dce110_external_encoder_control(enum bp_external_encoder_control_action action,
+ struct dc_link *link,
+ struct dc_crtc_timing *timing)
{
- struct dc_link *link = pipe_ctx->stream->link;
+ struct dc *dc = link->ctx->dc;
struct dc_bios *bios = link->ctx->dc_bios;
- struct bp_encoder_control encoder_control = {0};
+ const struct dc_link_settings *link_settings = &link->cur_link_settings;
+ enum bp_result bp_result = BP_RESULT_OK;
+ struct bp_external_encoder_control ext_cntl = {
+ .action = action,
+ .connector_obj_id = link->link_enc->connector,
+ .encoder_id = link->ext_enc_id,
+ .lanes_number = link_settings->lane_count,
+ .link_rate = link_settings->link_rate,
+
+ /* Use signal type of the real link encoder, ie. DP */
+ .signal = link->connector_signal,
+
+ /* We don't know the timing yet when executing the SETUP action,
+ * so use a reasonably high default value. It seems that ENABLE
+ * can change the actual pixel clock but doesn't work with higher
+ * pixel clocks than what SETUP was called with.
+ */
+ .pixel_clock = timing ? timing->pix_clk_100hz / 10 : 300000,
+ .color_depth = timing ? timing->display_color_depth : COLOR_DEPTH_888,
+ };
+ DC_LOGGER_INIT(dc->ctx);
- encoder_control.action = enable ? ENCODER_CONTROL_ENABLE : ENCODER_CONTROL_DISABLE;
- encoder_control.engine_id = link->link_enc->analog_engine;
- encoder_control.pixel_clock = pipe_ctx->stream->timing.pix_clk_100hz / 10;
- bios->funcs->encoder_control(bios, &encoder_control);
+ bp_result = bios->funcs->external_encoder_control(bios, &ext_cntl);
+
+ if (bp_result != BP_RESULT_OK)
+ DC_LOG_ERROR("Failed to execute external encoder action: 0x%x\n", action);
+}
+
+static void
+dce110_prepare_ddc(struct dc_link *link)
+{
+ if (link->ext_enc_id.id)
+ dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_DDC_SETUP, link, NULL);
}
static bool
@@ -684,7 +713,8 @@ dce110_dac_load_detect(struct dc_link *link)
struct link_encoder *link_enc = link->link_enc;
enum bp_result bp_result;
- bp_result = bios->funcs->dac_load_detection(bios, link_enc->analog_engine);
+ bp_result = bios->funcs->dac_load_detection(
+ bios, link_enc->analog_engine, link->ext_enc_id);
return bp_result == BP_RESULT_OK;
}
@@ -700,7 +730,6 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx)
uint32_t early_control = 0;
struct timing_generator *tg = pipe_ctx->stream_res.tg;
- link_hwss->setup_stream_attribute(pipe_ctx);
link_hwss->setup_stream_encoder(pipe_ctx);
dc->hwss.update_info_frame(pipe_ctx);
@@ -719,8 +748,8 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx)
tg->funcs->set_early_control(tg, early_control);
- if (dc_is_rgb_signal(pipe_ctx->stream->signal))
- dce110_dac_encoder_control(pipe_ctx, true);
+ if (link->ext_enc_id.id)
+ dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_ENABLE, link, timing);
}
static enum bp_result link_transmitter_control(
@@ -1219,8 +1248,8 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
link_enc->transmitter - TRANSMITTER_UNIPHY_A);
}
- if (dc_is_rgb_signal(pipe_ctx->stream->signal))
- dce110_dac_encoder_control(pipe_ctx, false);
+ if (link->ext_enc_id.id)
+ dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_DISABLE, link, NULL);
}
void dce110_unblank_stream(struct pipe_ctx *pipe_ctx,
@@ -1603,22 +1632,6 @@ static enum dc_status dce110_enable_stream_timing(
return DC_OK;
}
-static void
-dce110_select_crtc_source(struct pipe_ctx *pipe_ctx)
-{
- struct dc_link *link = pipe_ctx->stream->link;
- struct dc_bios *bios = link->ctx->dc_bios;
- struct bp_crtc_source_select crtc_source_select = {0};
- enum engine_id engine_id = link->link_enc->preferred_engine;
-
- if (dc_is_rgb_signal(pipe_ctx->stream->signal))
- engine_id = link->link_enc->analog_engine;
- crtc_source_select.controller_id = CONTROLLER_ID_D0 + pipe_ctx->stream_res.tg->inst;
- crtc_source_select.color_depth = pipe_ctx->stream->timing.display_color_depth;
- crtc_source_select.engine_id = engine_id;
- crtc_source_select.sink_signal = pipe_ctx->stream->signal;
- bios->funcs->select_crtc_source(bios, &crtc_source_select);
-}
enum dc_status dce110_apply_single_controller_ctx_to_hw(
struct pipe_ctx *pipe_ctx,
@@ -1639,10 +1652,6 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw(
hws->funcs.disable_stream_gating(dc, pipe_ctx);
}
- if (pipe_ctx->stream->signal == SIGNAL_TYPE_RGB) {
- dce110_select_crtc_source(pipe_ctx);
- }
-
if (pipe_ctx->stream_res.audio != NULL) {
struct audio_output audio_output = {0};
@@ -1722,8 +1731,7 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw(
pipe_ctx->stream_res.tg->funcs->set_static_screen_control(
pipe_ctx->stream_res.tg, event_triggers, 2);
- if (!dc_is_virtual_signal(pipe_ctx->stream->signal) &&
- !dc_is_rgb_signal(pipe_ctx->stream->signal))
+ if (!dc_is_virtual_signal(pipe_ctx->stream->signal))
pipe_ctx->stream_res.stream_enc->funcs->dig_connect_to_otg(
pipe_ctx->stream_res.stream_enc,
pipe_ctx->stream_res.tg->inst);
@@ -3376,6 +3384,15 @@ void dce110_enable_tmds_link_output(struct dc_link *link,
link->phy_state.symclk_state = SYMCLK_ON_TX_ON;
}
+static void dce110_enable_analog_link_output(
+ struct dc_link *link,
+ uint32_t pix_clk_100hz)
+{
+ link->link_enc->funcs->enable_analog_output(
+ link->link_enc,
+ pix_clk_100hz);
+}
+
void dce110_enable_dp_link_output(
struct dc_link *link,
const struct link_resource *link_res,
@@ -3423,6 +3440,11 @@ void dce110_enable_dp_link_output(
}
}
+ if (link->ext_enc_id.id) {
+ dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_INIT, link, NULL);
+ dce110_external_encoder_control(EXTERNAL_ENCODER_CONTROL_SETUP, link, NULL);
+ }
+
if (dc->link_srv->dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) {
if (dc->clk_mgr->funcs->notify_link_rate_change)
dc->clk_mgr->funcs->notify_link_rate_change(dc->clk_mgr, link);
@@ -3513,8 +3535,10 @@ static const struct hw_sequencer_funcs dce110_funcs = {
.enable_lvds_link_output = dce110_enable_lvds_link_output,
.enable_tmds_link_output = dce110_enable_tmds_link_output,
.enable_dp_link_output = dce110_enable_dp_link_output,
+ .enable_analog_link_output = dce110_enable_analog_link_output,
.disable_link_output = dce110_disable_link_output,
.dac_load_detect = dce110_dac_load_detect,
+ .prepare_ddc = dce110_prepare_ddc,
};
static const struct hwseq_private_funcs dce110_private_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
index 7e7682d7dfc8..ae4c4ad05baa 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
@@ -568,7 +568,9 @@ static bool construct_phy(struct dc_link *link,
goto ddc_create_fail;
}
- if (!link->ddc->ddc_pin) {
+ /* Embedded display connectors such as LVDS may not have DDC. */
+ if (!link->ddc->ddc_pin &&
+ !dc_is_embedded_signal(link->connector_signal)) {
DC_ERROR("Failed to get I2C info for connector!\n");
goto ddc_create_fail;
}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
index 6a25dcfcdf17..d2d56a1c4b8b 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce60/dce60_resource.c
@@ -753,7 +753,8 @@ static struct link_encoder *dce60_link_encoder_create(
enc_init_data,
&link_enc_feature,
&link_enc_regs[link_regs_id],
- &link_enc_aux_regs[enc_init_data->channel - 1],
+ enc_init_data->channel == CHANNEL_ID_UNKNOWN ?
+ NULL : &link_enc_aux_regs[enc_init_data->channel - 1],
enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ?
NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]);
return &enc110->base;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
index 33be49b3c1b1..6c00497e9a01 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dce80/dce80_resource.c
@@ -760,7 +760,8 @@ static struct link_encoder *dce80_link_encoder_create(
enc_init_data,
&link_enc_feature,
&link_enc_regs[link_regs_id],
- &link_enc_aux_regs[enc_init_data->channel - 1],
+ enc_init_data->channel == CHANNEL_ID_UNKNOWN ?
+ NULL : &link_enc_aux_regs[enc_init_data->channel - 1],
enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs) ?
NULL : &link_enc_hpd_regs[enc_init_data->hpd_source]);
return &enc110->base;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index 82f81b586986..3751f7a94a05 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -92,9 +92,14 @@
#include "dml/dcn32/dcn32_fpu.h"
#include "dc_state_priv.h"
+#include "dc_fpu.h"
#include "dml2_0/dml2_wrapper.h"
+#if !defined(DC_RUN_WITH_PREEMPTION_ENABLED)
+#define DC_RUN_WITH_PREEMPTION_ENABLED(code) code
+#endif
+
#define DC_LOGGER_INIT(logger)
enum dcn32_clk_src_array_id {
@@ -1684,7 +1689,8 @@ static void dcn32_enable_phantom_plane(struct dc *dc,
if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state)
phantom_plane = prev_phantom_plane;
else
- phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state);
+ DC_RUN_WITH_PREEMPTION_ENABLED(phantom_plane =
+ dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state));
if (!phantom_plane)
continue;
diff --git a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
index 38a77fa9b4af..a0f03fb67605 100644
--- a/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
+++ b/drivers/gpu/drm/amd/display/include/grph_object_ctrl_defs.h
@@ -153,6 +153,10 @@ struct embedded_panel_info {
uint32_t drr_enabled;
uint32_t min_drr_refresh_rate;
bool realtek_eDPToLVDS;
+ uint16_t panel_width_mm;
+ uint16_t panel_height_mm;
+ uint16_t fake_edid_size;
+ const uint8_t *fake_edid;
};
struct dc_firmware_info {
diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
index 731355bdb9bc..3650e7beeb67 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/ci_smumgr.c
@@ -1333,12 +1333,13 @@ static int ci_populate_all_memory_levels(struct pp_hwmgr *hwmgr)
dev_id = adev->pdev->device;
- if ((dpm_table->mclk_table.count >= 2)
- && ((dev_id == 0x67B0) || (dev_id == 0x67B1))) {
- smu_data->smc_state_table.MemoryLevel[1].MinVddci =
- smu_data->smc_state_table.MemoryLevel[0].MinVddci;
- smu_data->smc_state_table.MemoryLevel[1].MinMvdd =
- smu_data->smc_state_table.MemoryLevel[0].MinMvdd;
+ if ((dpm_table->mclk_table.count >= 2) &&
+ ((dev_id == 0x67B0) || (dev_id == 0x67B1)) &&
+ (adev->pdev->revision == 0)) {
+ smu_data->smc_state_table.MemoryLevel[1].MinVddc =
+ smu_data->smc_state_table.MemoryLevel[0].MinVddc;
+ smu_data->smc_state_table.MemoryLevel[1].MinVddcPhases =
+ smu_data->smc_state_table.MemoryLevel[0].MinVddcPhases;
}
smu_data->smc_state_table.MemoryLevel[0].ActivityLevel = 0x1F;
CONVERT_FROM_HOST_TO_SMC_US(smu_data->smc_state_table.MemoryLevel[0].ActivityLevel);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index 7f386ff0c872..9d8b1227388f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -425,6 +425,7 @@ static int aldebaran_set_default_dpm_table(struct smu_context *smu)
dpm_table->dpm_levels[0].enabled = true;
dpm_table->dpm_levels[1].value = pptable->GfxclkFmax;
dpm_table->dpm_levels[1].enabled = true;
+ dpm_table->flags |= SMU_DPM_TABLE_FINE_GRAINED;
} else {
dpm_table->count = 1;
dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index cd0a23f432ff..0df8c05a7fce 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -1129,6 +1129,7 @@ static int smu_v13_0_6_set_default_dpm_table(struct smu_context *smu)
/* gfxclk dpm table setup */
dpm_table = &dpm_context->dpm_tables.gfx_table;
dpm_table->clk_type = SMU_GFXCLK;
+ dpm_table->flags = SMU_DPM_TABLE_FINE_GRAINED;
if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) {
/* In the case of gfxclk, only fine-grained dpm is honored.
* Get min/max values from FW.
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c
index c3cb36813806..940b43105817 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu15/smu_v15_0.c
@@ -435,10 +435,12 @@ int smu_v15_0_fini_smc_tables(struct smu_context *smu)
smu_table->watermarks_table = NULL;
smu_table->metrics_time = 0;
+ kfree(smu_dpm->dpm_policies);
kfree(smu_dpm->dpm_context);
kfree(smu_dpm->golden_dpm_context);
kfree(smu_dpm->dpm_current_power_state);
kfree(smu_dpm->dpm_request_power_state);
+ smu_dpm->dpm_policies = NULL;
smu_dpm->dpm_context = NULL;
smu_dpm->golden_dpm_context = NULL;
smu_dpm->dpm_context_size = 0;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 3d49e58794d2..90c7127beabf 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -1370,7 +1370,7 @@ int smu_cmn_print_dpm_clk_levels(struct smu_context *smu,
level_index = 1;
}
- if (!is_fine_grained) {
+ if (!is_fine_grained || count == 1) {
for (i = 0; i < count; i++) {
freq_match = !is_deep_sleep &&
smu_cmn_freqs_match(
diff --git a/drivers/gpu/drm/bridge/chipone-icn6211.c b/drivers/gpu/drm/bridge/chipone-icn6211.c
index 814713c5bea9..553a1df4688d 100644
--- a/drivers/gpu/drm/bridge/chipone-icn6211.c
+++ b/drivers/gpu/drm/bridge/chipone-icn6211.c
@@ -758,7 +758,9 @@ static int chipone_i2c_probe(struct i2c_client *client)
dev_set_drvdata(dev, icn);
i2c_set_clientdata(client, icn);
- drm_bridge_add(&icn->bridge);
+ ret = devm_drm_bridge_add(dev, &icn->bridge);
+ if (ret)
+ return ret;
return chipone_dsi_host_attach(icn);
}
diff --git a/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c b/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c
index 441fd32dc91c..d64e328bf542 100644
--- a/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c
+++ b/drivers/gpu/drm/bridge/imx/imx8qxp-pxl2dpi.c
@@ -222,52 +222,58 @@ static const struct drm_bridge_funcs imx8qxp_pxl2dpi_bridge_funcs = {
imx8qxp_pxl2dpi_bridge_atomic_get_output_bus_fmts,
};
-static struct device_node *
+static int
imx8qxp_pxl2dpi_get_available_ep_from_port(struct imx8qxp_pxl2dpi *p2d,
- u32 port_id)
+ u32 port_id,
+ struct device_node **ep)
{
- struct device_node *port, *ep;
+ struct device_node *port;
+ int ret = 0;
int ep_cnt;
+ *ep = NULL;
+
port = of_graph_get_port_by_id(p2d->dev->of_node, port_id);
if (!port) {
DRM_DEV_ERROR(p2d->dev, "failed to get port@%u\n", port_id);
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
}
ep_cnt = of_get_available_child_count(port);
if (ep_cnt == 0) {
DRM_DEV_ERROR(p2d->dev, "no available endpoints of port@%u\n",
port_id);
- ep = ERR_PTR(-ENODEV);
+ ret = -ENODEV;
goto out;
} else if (ep_cnt > 1) {
DRM_DEV_ERROR(p2d->dev,
"invalid available endpoints of port@%u\n",
port_id);
- ep = ERR_PTR(-EINVAL);
+ ret = -EINVAL;
goto out;
}
- ep = of_get_next_available_child(port, NULL);
- if (!ep) {
+ *ep = of_get_next_available_child(port, NULL);
+ if (!*ep) {
DRM_DEV_ERROR(p2d->dev,
"failed to get available endpoint of port@%u\n",
port_id);
- ep = ERR_PTR(-ENODEV);
+ ret = -ENODEV;
goto out;
}
out:
of_node_put(port);
- return ep;
+ return ret;
}
static int imx8qxp_pxl2dpi_find_next_bridge(struct imx8qxp_pxl2dpi *p2d)
{
- struct device_node *ep __free(device_node) =
- imx8qxp_pxl2dpi_get_available_ep_from_port(p2d, 1);
- if (IS_ERR(ep))
- return PTR_ERR(ep);
+ struct device_node *ep __free(device_node) = NULL;
+ int ret;
+
+ ret = imx8qxp_pxl2dpi_get_available_ep_from_port(p2d, 1, &ep);
+ if (ret)
+ return ret;
struct device_node *remote __free(device_node) = of_graph_get_remote_port_parent(ep);
if (!remote || !of_device_is_available(remote)) {
@@ -291,9 +297,9 @@ static int imx8qxp_pxl2dpi_set_pixel_link_sel(struct imx8qxp_pxl2dpi *p2d)
struct of_endpoint endpoint;
int ret;
- ep = imx8qxp_pxl2dpi_get_available_ep_from_port(p2d, 0);
- if (IS_ERR(ep))
- return PTR_ERR(ep);
+ ret = imx8qxp_pxl2dpi_get_available_ep_from_port(p2d, 0, &ep);
+ if (ret)
+ return ret;
ret = of_graph_parse_endpoint(ep, &endpoint);
if (ret) {
diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c
index 9246e9c15a6e..ed21f09cd19a 100644
--- a/drivers/gpu/drm/bridge/ite-it66121.c
+++ b/drivers/gpu/drm/bridge/ite-it66121.c
@@ -1559,6 +1559,11 @@ static int it66121_probe(struct i2c_client *client)
return ret;
}
+ ctx->gpio_reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(ctx->gpio_reset))
+ return dev_err_probe(dev, PTR_ERR(ctx->gpio_reset),
+ "Failed to get reset GPIO\n");
+
it66121_hw_reset(ctx);
ctx->regmap = devm_regmap_init_i2c(client, &it66121_regmap_config);
diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
index c9e6505cbd88..2d02cc69f237 100644
--- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
+++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
@@ -251,7 +251,6 @@ static void ge_b850v3_lvds_remove(void)
goto out;
drm_bridge_remove(&ge_b850v3_lvds_ptr->bridge);
-
ge_b850v3_lvds_ptr = NULL;
out:
mutex_unlock(&ge_b850v3_lvds_dev_mutex);
@@ -261,6 +260,7 @@ static int ge_b850v3_register(void)
{
struct i2c_client *stdp4028_i2c = ge_b850v3_lvds_ptr->stdp4028_i2c;
struct device *dev = &stdp4028_i2c->dev;
+ int ret;
/* drm bridge initialization */
ge_b850v3_lvds_ptr->bridge.ops = DRM_BRIDGE_OP_DETECT |
@@ -277,11 +277,15 @@ static int ge_b850v3_register(void)
if (!stdp4028_i2c->irq)
return 0;
- return devm_request_threaded_irq(&stdp4028_i2c->dev,
- stdp4028_i2c->irq, NULL,
- ge_b850v3_lvds_irq_handler,
- IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
- "ge-b850v3-lvds-dp", ge_b850v3_lvds_ptr);
+ ret = devm_request_threaded_irq(&stdp4028_i2c->dev,
+ stdp4028_i2c->irq, NULL,
+ ge_b850v3_lvds_irq_handler,
+ IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+ "ge-b850v3-lvds-dp", ge_b850v3_lvds_ptr);
+ if (ret)
+ drm_bridge_remove(&ge_b850v3_lvds_ptr->bridge);
+
+ return ret;
}
static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c)
diff --git a/drivers/gpu/drm/bridge/tda998x_drv.c b/drivers/gpu/drm/bridge/tda998x_drv.c
index d9b388165de1..6c427bc75896 100644
--- a/drivers/gpu/drm/bridge/tda998x_drv.c
+++ b/drivers/gpu/drm/bridge/tda998x_drv.c
@@ -1293,7 +1293,7 @@ static const struct drm_edid *tda998x_edid_read(struct tda998x_priv *priv,
* can't handle signals gracefully.
*/
if (tda998x_edid_delay_wait(priv))
- return 0;
+ return NULL;
if (priv->rev == TDA19988)
reg_clear(priv, REG_TX4, TX4_PD_RAM);
@@ -1762,7 +1762,7 @@ static const struct drm_bridge_funcs tda998x_bridge_funcs = {
static int tda998x_get_audio_ports(struct tda998x_priv *priv,
struct device_node *np)
{
- const u32 *port_data;
+ const __be32 *port_data;
u32 size;
int i;
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c
index c598b99673fc..e7db4e4ea700 100644
--- a/drivers/gpu/drm/drm_color_mgmt.c
+++ b/drivers/gpu/drm/drm_color_mgmt.c
@@ -831,7 +831,7 @@ static void fill_palette_332(struct drm_crtc *crtc, u16 r, u16 g, u16 b,
}
/**
- * drm_crtc_fill_palette_332 - Programs a default palette for R332-like formats
+ * drm_crtc_fill_palette_332 - Programs a default palette for RGB332-like formats
* @crtc: The displaying CRTC
* @set_palette: Callback for programming the hardware gamma LUT
*
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 985c283cf59f..675675480da4 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -697,6 +697,7 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
mutex_destroy(&dev->master_mutex);
mutex_destroy(&dev->clientlist_mutex);
mutex_destroy(&dev->filelist_mutex);
+ mutex_destroy(&dev->gem_lru_mutex);
}
static int drm_dev_init(struct drm_device *dev,
@@ -738,6 +739,7 @@ static int drm_dev_init(struct drm_device *dev,
INIT_LIST_HEAD(&dev->vblank_event_list);
spin_lock_init(&dev->event_lock);
+ mutex_init(&dev->gem_lru_mutex);
mutex_init(&dev->filelist_mutex);
mutex_init(&dev->clientlist_mutex);
mutex_init(&dev->master_mutex);
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index a80a335f4148..1541fc8a9ac2 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -490,7 +490,7 @@ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off,
* the number of horizontal pixels that need an update.
*/
off_t bit_off = (off % line_length) * 8;
- off_t bit_end = (end % line_length) * 8;
+ off_t bit_end = bit_off + len * 8;
x1 = bit_off / info->var.bits_per_pixel;
x2 = DIV_ROUND_UP(bit_end, info->var.bits_per_pixel);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index d6424267260b..e3b8a1f353cb 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1019,7 +1019,7 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct drm_gem_change_handle *args = data;
- struct drm_gem_object *obj;
+ struct drm_gem_object *obj, *idrobj;
int handle, ret;
if (!drm_core_check_feature(dev, DRIVER_GEM))
@@ -1042,12 +1042,30 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data,
mutex_lock(&file_priv->prime.lock);
spin_lock(&file_priv->table_lock);
+
+ /* When create_tail allocs an obj idr, it needs to first alloc as NULL,
+ * then later replace with the correct object. This is not necessary
+ * here, because the only operations that could race are drm_prime
+ * bookkeeping, and we hold the prime lock.
+ */
ret = idr_alloc(&file_priv->object_idr, obj, handle, handle + 1,
GFP_NOWAIT);
- spin_unlock(&file_priv->table_lock);
- if (ret < 0)
- goto out_unlock;
+ if (ret < 0) {
+ spin_unlock(&file_priv->table_lock);
+ goto out_unlock;
+ }
+
+ idrobj = idr_replace(&file_priv->object_idr, NULL, handle);
+ if (idrobj != obj) {
+ idr_replace(&file_priv->object_idr, idrobj, handle);
+ idr_remove(&file_priv->object_idr, args->new_handle);
+ spin_unlock(&file_priv->table_lock);
+ ret = -ENOENT;
+ goto out_unlock;
+ }
+
+ spin_unlock(&file_priv->table_lock);
if (obj->dma_buf) {
ret = drm_prime_add_buf_handle(&file_priv->prime, obj->dma_buf,
@@ -1066,7 +1084,9 @@ int drm_gem_change_handle_ioctl(struct drm_device *dev, void *data,
spin_lock(&file_priv->table_lock);
idr_remove(&file_priv->object_idr, args->handle);
+ idrobj = idr_replace(&file_priv->object_idr, obj, handle);
spin_unlock(&file_priv->table_lock);
+ WARN_ON(idrobj != NULL);
out_unlock:
mutex_unlock(&file_priv->prime.lock);
@@ -1541,12 +1561,10 @@ EXPORT_SYMBOL(drm_gem_unlock_reservations);
* drm_gem_lru_init - initialize a LRU
*
* @lru: The LRU to initialize
- * @lock: The lock protecting the LRU
*/
void
-drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock)
+drm_gem_lru_init(struct drm_gem_lru *lru)
{
- lru->lock = lock;
lru->count = 0;
INIT_LIST_HEAD(&lru->list);
}
@@ -1571,14 +1589,10 @@ drm_gem_lru_remove_locked(struct drm_gem_object *obj)
void
drm_gem_lru_remove(struct drm_gem_object *obj)
{
- struct drm_gem_lru *lru = obj->lru;
-
- if (!lru)
- return;
-
- mutex_lock(lru->lock);
- drm_gem_lru_remove_locked(obj);
- mutex_unlock(lru->lock);
+ mutex_lock(&obj->dev->gem_lru_mutex);
+ if (obj->lru)
+ drm_gem_lru_remove_locked(obj);
+ mutex_unlock(&obj->dev->gem_lru_mutex);
}
EXPORT_SYMBOL(drm_gem_lru_remove);
@@ -1593,7 +1607,7 @@ EXPORT_SYMBOL(drm_gem_lru_remove);
void
drm_gem_lru_move_tail_locked(struct drm_gem_lru *lru, struct drm_gem_object *obj)
{
- lockdep_assert_held_once(lru->lock);
+ lockdep_assert_held_once(&obj->dev->gem_lru_mutex);
if (obj->lru)
drm_gem_lru_remove_locked(obj);
@@ -1617,9 +1631,9 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail_locked);
void
drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj)
{
- mutex_lock(lru->lock);
+ mutex_lock(&obj->dev->gem_lru_mutex);
drm_gem_lru_move_tail_locked(lru, obj);
- mutex_unlock(lru->lock);
+ mutex_unlock(&obj->dev->gem_lru_mutex);
}
EXPORT_SYMBOL(drm_gem_lru_move_tail);
@@ -1633,6 +1647,7 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail);
* of the shrink callback to check for this (ie. dma_resv_test_signaled())
* or if necessary block until the buffer becomes idle.
*
+ * @dev: DRM device the LRU belongs to
* @lru: The LRU to scan
* @nr_to_scan: The number of pages to try to reclaim
* @remaining: The number of pages left to reclaim, should be initialized by caller
@@ -1640,7 +1655,8 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail);
* @ticket: Optional ww_acquire_ctx context to use for locking
*/
unsigned long
-drm_gem_lru_scan(struct drm_gem_lru *lru,
+drm_gem_lru_scan(struct drm_device *dev,
+ struct drm_gem_lru *lru,
unsigned int nr_to_scan,
unsigned long *remaining,
bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket),
@@ -1650,9 +1666,9 @@ drm_gem_lru_scan(struct drm_gem_lru *lru,
struct drm_gem_object *obj;
unsigned freed = 0;
- drm_gem_lru_init(&still_in_lru, lru->lock);
+ drm_gem_lru_init(&still_in_lru);
- mutex_lock(lru->lock);
+ mutex_lock(&dev->gem_lru_mutex);
while (freed < nr_to_scan) {
obj = list_first_entry_or_null(&lru->list, typeof(*obj), lru_node);
@@ -1675,7 +1691,7 @@ drm_gem_lru_scan(struct drm_gem_lru *lru,
* rest of the loop body, to reduce contention with other
* code paths that need the LRU lock
*/
- mutex_unlock(lru->lock);
+ mutex_unlock(&dev->gem_lru_mutex);
if (ticket)
ww_acquire_init(ticket, &reservation_ww_class);
@@ -1709,7 +1725,7 @@ drm_gem_lru_scan(struct drm_gem_lru *lru,
tail:
drm_gem_object_put(obj);
- mutex_lock(lru->lock);
+ mutex_lock(&dev->gem_lru_mutex);
}
/*
@@ -1721,7 +1737,7 @@ tail:
list_splice_tail(&still_in_lru.list, &lru->list);
lru->count += still_in_lru.count;
- mutex_unlock(lru->lock);
+ mutex_unlock(&dev->gem_lru_mutex);
return freed;
}
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
index 9166c353f131..88808e972cc1 100644
--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -172,8 +172,8 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev,
}
for (i = 0; i < info->num_planes; i++) {
- unsigned int width = mode_cmd->width / (i ? info->hsub : 1);
- unsigned int height = mode_cmd->height / (i ? info->vsub : 1);
+ unsigned int width = drm_format_info_plane_width(info, mode_cmd->width, i);
+ unsigned int height = drm_format_info_plane_height(info, mode_cmd->height, i);
unsigned int min_size;
objs[i] = drm_gem_object_lookup(file, mode_cmd->handles[i]);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index df4232d7e135..3cc50d697c89 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -116,16 +116,18 @@ int etnaviv_sched_push_job(struct etnaviv_gem_submit *submit)
*/
mutex_lock(&gpu->sched_lock);
+ ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id,
+ NULL, xa_limit_32b, &gpu->next_user_fence,
+ GFP_KERNEL);
+ if (ret < 0)
+ goto out_unlock;
+
drm_sched_job_arm(&submit->sched_job);
submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished);
- ret = xa_alloc_cyclic(&gpu->user_fences, &submit->out_fence_id,
- submit->out_fence, xa_limit_32b,
- &gpu->next_user_fence, GFP_KERNEL);
- if (ret < 0) {
- drm_sched_job_cleanup(&submit->sched_job);
- goto out_unlock;
- }
+
+ xa_store(&gpu->user_fences, submit->out_fence_id,
+ submit->out_fence, GFP_KERNEL);
/* the scheduler holds on to the job now */
kref_get(&submit->refcount);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c
index 29a8366513fa..e68c954ec3e6 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_mic.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -423,7 +423,9 @@ static int exynos_mic_probe(struct platform_device *pdev)
mic->bridge.of_node = dev->of_node;
- drm_bridge_add(&mic->bridge);
+ ret = devm_drm_bridge_add(dev, &mic->bridge);
+ if (ret)
+ goto err;
pm_runtime_enable(dev);
@@ -443,12 +445,8 @@ err:
static void exynos_mic_remove(struct platform_device *pdev)
{
- struct exynos_mic *mic = platform_get_drvdata(pdev);
-
component_del(&pdev->dev, &exynos_mic_component_ops);
pm_runtime_disable(&pdev->dev);
-
- drm_bridge_remove(&mic->bridge);
}
static const struct of_device_id exynos_mic_of_match[] = {
diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
index 58d7e191fd56..403d21cbb3a2 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
@@ -580,6 +580,7 @@ static int oaktrail_hdmi_get_modes(struct drm_connector *connector)
} else {
edid = (struct edid *)raw_edid;
/* FIXME ? edid = drm_get_edid(connector, i2c_adap); */
+ i2c_put_adapter(i2c_adap);
}
if (edid) {
diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c
index 884d324f0044..e194d0cce067 100644
--- a/drivers/gpu/drm/gma500/oaktrail_lvds.c
+++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c
@@ -293,7 +293,7 @@ void oaktrail_lvds_init(struct drm_device *dev,
{
struct gma_encoder *gma_encoder;
struct gma_connector *gma_connector;
- struct gma_i2c_chan *ddc_bus;
+ struct gma_i2c_chan *ddc_bus = NULL;
struct drm_connector *connector;
struct drm_encoder *encoder;
struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
@@ -367,6 +367,8 @@ void oaktrail_lvds_init(struct drm_device *dev,
if (edid == NULL && dev_priv->lpc_gpio_base) {
ddc_bus = oaktrail_lvds_i2c_init(dev);
if (!IS_ERR(ddc_bus)) {
+ if (i2c_adap)
+ i2c_put_adapter(i2c_adap);
i2c_adap = &ddc_bus->base;
edid = drm_get_edid(connector, i2c_adap);
}
@@ -421,7 +423,10 @@ out:
err_unlock:
mutex_unlock(&dev->mode_config.mutex);
- gma_i2c_destroy(to_gma_i2c_chan(connector->ddc));
+ if (!IS_ERR_OR_NULL(ddc_bus))
+ gma_i2c_destroy(ddc_bus);
+ else if (i2c_adap)
+ i2c_put_adapter(i2c_adap);
drm_encoder_cleanup(encoder);
err_connector_cleanup:
drm_connector_cleanup(connector);
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index f6cd0a062090..9c7c357afb09 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -584,6 +584,7 @@ struct intel_connector {
struct {
u8 dpcd[EDP_PSR_RECEIVER_CAP_SIZE];
+ u8 intel_wa_dpcd;
bool support;
bool su_support;
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 4955bd8b11d7..6ef2a0043cda 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -3119,8 +3119,13 @@ static void intel_dp_compute_vsc_colorimetry(const struct intel_crtc_state *crtc
drm_WARN_ON(display->drm,
vsc->bpc == 6 && vsc->pixelformat != DP_PIXELFORMAT_RGB);
- /* all YCbCr are always limited range */
- vsc->dynamic_range = DP_DYNAMIC_RANGE_CTA;
+ /* All YCbCr formats are always limited range. */
+ if (vsc->pixelformat == DP_PIXELFORMAT_RGB)
+ vsc->dynamic_range = crtc_state->limited_color_range ?
+ DP_DYNAMIC_RANGE_CTA : DP_DYNAMIC_RANGE_VESA;
+ else
+ vsc->dynamic_range = DP_DYNAMIC_RANGE_CTA;
+
vsc->content_type = DP_CONTENT_TYPE_NOT_DEFINED;
}
@@ -5298,7 +5303,7 @@ int intel_dp_as_sdp_unpack(struct drm_dp_as_sdp *as_sdp,
as_sdp->length = sdp->sdp_header.HB3 & DP_ADAPTIVE_SYNC_SDP_LENGTH;
as_sdp->mode = sdp->db[0] & DP_ADAPTIVE_SYNC_SDP_OPERATION_MODE;
as_sdp->vtotal = (sdp->db[2] << 8) | sdp->db[1];
- as_sdp->target_rr = (u64)sdp->db[3] | ((u64)sdp->db[4] & 0x3);
+ as_sdp->target_rr = ((sdp->db[4] & 0x3) << 8) | sdp->db[3];
as_sdp->target_rr_divider = sdp->db[4] & 0x20 ? true : false;
return 0;
diff --git a/drivers/gpu/drm/i915/display/intel_dpcd.h b/drivers/gpu/drm/i915/display/intel_dpcd.h
new file mode 100644
index 000000000000..4aea5326f2ed
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dpcd.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef __INTEL_DPCD_H__
+#define __INTEL_DPCD_H__
+
+#define INTEL_DPCD_INTEL_WA_REGISTER_CAPS 0x3f0
+# define INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_EARLYSCANLINE_SDP_SUPPORT_MASK REG_GENMASK(1, 0)
+# define INTEL_DPCD_INTEL_WA_REGISTER_CAPS_FALL_BACK_TO_PSR1 0
+# define INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_WITH_EARLY_SCANLINE 1
+# define INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_WITHOUT_EARLY_SCANLINE 2
+
+#endif /* __INTEL_DPCD_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c
index 5390ceb21ca4..82f445c83158 100644
--- a/drivers/gpu/drm/i915/display/intel_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_plane.c
@@ -373,7 +373,7 @@ intel_plane_color_copy_uapi_to_hw_state(struct intel_plane_state *plane_state,
bool changed = false;
int i = 0;
- iter_colorop = plane_state->uapi.color_pipeline;
+ iter_colorop = from_plane_state->uapi.color_pipeline;
while (iter_colorop) {
for_each_new_colorop_in_state(state, colorop, new_colorop_state, i) {
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 53c10ae76ab5..29904a037575 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -43,6 +43,7 @@
#include "intel_display_wa.h"
#include "intel_dmc.h"
#include "intel_dp.h"
+#include "intel_dpcd.h"
#include "intel_dp_aux.h"
#include "intel_dsb.h"
#include "intel_frontbuffer.h"
@@ -716,8 +717,14 @@ static void _psr_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *co
connector->dp.psr_caps.su_support ? "" : "not ");
}
- if (connector->dp.psr_caps.su_support)
+ if (connector->dp.psr_caps.su_support) {
+ ret = drm_dp_dpcd_read_byte(&intel_dp->aux,
+ INTEL_DPCD_INTEL_WA_REGISTER_CAPS,
+ &connector->dp.psr_caps.intel_wa_dpcd);
+ if (ret < 0)
+ return;
_psr_compute_su_granularity(intel_dp, connector);
+ }
}
void intel_psr_init_dpcd(struct intel_dp *intel_dp, struct intel_connector *connector)
@@ -1358,9 +1365,35 @@ static bool psr2_granularity_check(struct intel_crtc_state *crtc_state,
return true;
}
-static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_dp,
- struct intel_crtc_state *crtc_state)
+static bool apply_scanline_indication_wa(struct intel_crtc_state *crtc_state,
+ struct intel_connector *connector)
+{
+ struct intel_dp *intel_dp = intel_attached_dp(connector);
+ u8 early_scanline_support = connector->dp.psr_caps.intel_wa_dpcd &
+ INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_EARLYSCANLINE_SDP_SUPPORT_MASK;
+
+ if (intel_dp->edp_dpcd[0] >= DP_EDP_15)
+ return true;
+
+ switch (early_scanline_support) {
+ case INTEL_DPCD_INTEL_WA_REGISTER_CAPS_FALL_BACK_TO_PSR1:
+ crtc_state->req_psr2_sdp_prior_scanline = false;
+ return false;
+ case INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_WITH_EARLY_SCANLINE:
+ return true;
+ case INTEL_DPCD_INTEL_WA_REGISTER_CAPS_PSR2_WITHOUT_EARLY_SCANLINE:
+ crtc_state->req_psr2_sdp_prior_scanline = false;
+ return true;
+ default:
+ MISSING_CASE(early_scanline_support);
+ return false;
+ }
+}
+
+static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_crtc_state *crtc_state,
+ struct intel_connector *connector)
{
+ struct intel_dp *intel_dp = intel_attached_dp(connector);
struct intel_display *display = to_intel_display(intel_dp);
const struct drm_display_mode *adjusted_mode = &crtc_state->uapi.adjusted_mode;
u32 hblank_total, hblank_ns, req_ns;
@@ -1379,7 +1412,8 @@ static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_d
return false;
crtc_state->req_psr2_sdp_prior_scanline = true;
- return true;
+
+ return apply_scanline_indication_wa(crtc_state, connector);
}
static int intel_psr_entry_setup_frames(struct intel_dp *intel_dp,
@@ -1660,7 +1694,7 @@ static bool intel_sel_update_config_valid(struct intel_crtc_state *crtc_state,
conn_state))
goto unsupported;
- if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) {
+ if (!_compute_psr2_sdp_prior_scanline_indication(crtc_state, connector)) {
drm_dbg_kms(display->drm,
"Selective update not enabled, SDP indication do not fit in hblank\n");
goto unsupported;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 984d0056c01c..adff482a6c9c 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -132,7 +132,8 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
rcu_read_lock(); /* protect the GEM context */
if (guilty) {
i915_request_set_error_once(rq, -EIO);
- __i915_request_skip(rq);
+ if (!i915_request_signaled(rq))
+ __i915_request_skip(rq);
banned = mark_guilty(rq);
} else {
i915_request_set_error_once(rq, -EAGAIN);
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 385a634c3ed0..d9be7a5a239c 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -750,9 +750,8 @@ static bool has_auxccs(struct drm_device *drm)
{
struct drm_i915_private *i915 = to_i915(drm);
- return IS_GRAPHICS_VER(i915, 9, 12) ||
- IS_ALDERLAKE_P(i915) ||
- IS_METEORLAKE(i915);
+ return IS_GRAPHICS_VER(i915, 9, 12) &&
+ !HAS_FLAT_CCS(i915);
}
static bool has_fenced_regions(struct drm_device *drm)
diff --git a/drivers/gpu/drm/imagination/pvr_fw_trace.c b/drivers/gpu/drm/imagination/pvr_fw_trace.c
index e154cb35f604..6193811ef7be 100644
--- a/drivers/gpu/drm/imagination/pvr_fw_trace.c
+++ b/drivers/gpu/drm/imagination/pvr_fw_trace.c
@@ -558,6 +558,6 @@ pvr_fw_trace_debugfs_init(struct pvr_device *pvr_dev, struct dentry *dir)
&pvr_fw_trace_fops);
}
- debugfs_create_file("trace_mask", 0600, dir, fw_trace,
+ debugfs_create_file("trace_mask", 0600, dir, pvr_dev,
&pvr_fw_trace_mask_fops);
}
diff --git a/drivers/gpu/drm/loongson/lsdc_drv.c b/drivers/gpu/drm/loongson/lsdc_drv.c
index 1ece1ea42f78..34405073c4d4 100644
--- a/drivers/gpu/drm/loongson/lsdc_drv.c
+++ b/drivers/gpu/drm/loongson/lsdc_drv.c
@@ -293,7 +293,7 @@ static int lsdc_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
vga_client_register(pdev, lsdc_vga_set_decode);
- drm_kms_helper_poll_init(ddev);
+ drmm_kms_helper_poll_init(ddev);
if (loongson_vblank) {
ret = drm_vblank_init(ddev, descp->num_of_crtc);
diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c
index c7be530ca041..b8ccd6e55bed 100644
--- a/drivers/gpu/drm/mediatek/mtk_cec.c
+++ b/drivers/gpu/drm/mediatek/mtk_cec.c
@@ -240,7 +240,7 @@ static const struct of_device_id mtk_cec_of_ids[] = {
};
MODULE_DEVICE_TABLE(of, mtk_cec_of_ids);
-struct platform_driver mtk_cec_driver = {
+static struct platform_driver mtk_cec_driver = {
.probe = mtk_cec_probe,
.remove = mtk_cec_remove,
.driver = {
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c
index 6358e1af69b4..2acbdb025d89 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c
@@ -328,7 +328,7 @@ static const struct of_device_id mtk_hdmi_ddc_match[] = {
};
MODULE_DEVICE_TABLE(of, mtk_hdmi_ddc_match);
-struct platform_driver mtk_hdmi_ddc_driver = {
+static struct platform_driver mtk_hdmi_ddc_driver = {
.probe = mtk_hdmi_ddc_probe,
.remove = mtk_hdmi_ddc_remove,
.driver = {
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c
index d937219fdb7e..31e81a6de6d8 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc_v2.c
@@ -389,7 +389,7 @@ static const struct of_device_id mtk_hdmi_ddc_v2_match[] = {
};
MODULE_DEVICE_TABLE(of, mtk_hdmi_ddc_v2_match);
-struct platform_driver mtk_hdmi_ddc_v2_driver = {
+static struct platform_driver mtk_hdmi_ddc_v2_driver = {
.probe = mtk_hdmi_ddc_v2_probe,
.driver = {
.name = "mediatek-hdmi-ddc-v2",
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_v2.c b/drivers/gpu/drm/mediatek/mtk_hdmi_v2.c
index b5c738380dc2..a8eb6fd0908b 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi_v2.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi_v2.c
@@ -50,7 +50,7 @@ enum mtk_hdmi_v2_clk_id {
MTK_HDMI_V2_CLK_COUNT,
};
-const char *const mtk_hdmi_v2_clk_names[MTK_HDMI_V2_CLK_COUNT] = {
+static const char *const mtk_hdmi_v2_clk_names[MTK_HDMI_V2_CLK_COUNT] = {
[MTK_HDMI_V2_CLK_HDMI_APB_SEL] = "bus",
[MTK_HDMI_V2_CLK_HDCP_SEL] = "hdcp",
[MTK_HDMI_V2_CLK_HDCP_24M_SEL] = "hdcp24m",
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index d5aba072f44c..7a3e3c2f5cf3 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -2621,7 +2621,6 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
struct platform_device *pdev = priv->gpu_pdev;
struct adreno_platform_config *config = pdev->dev.platform_data;
const struct adreno_info *info = config->info;
- struct device_node *node;
struct a6xx_gpu *a6xx_gpu;
struct adreno_gpu *adreno_gpu;
struct msm_gpu *gpu;
@@ -2643,7 +2642,8 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = NULL;
/* Check if there is a GMU phandle and set it up */
- node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
+ struct device_node *node __free(device_node) =
+ of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
/* FIXME: How do we gracefully handle this? */
BUG_ON(!node);
@@ -2690,7 +2690,6 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
else
ret = a6xx_gmu_init(a6xx_gpu, node);
- of_node_put(node);
if (ret) {
a6xx_destroy(&(a6xx_gpu->base.base));
return ERR_PTR(ret);
@@ -2740,6 +2739,7 @@ const struct adreno_gpu_funcs a6xx_gpu_funcs = {
.create_private_vm = a6xx_create_private_vm,
.get_rptr = a6xx_get_rptr,
.progress = a6xx_progress,
+ .sysprof_setup = a6xx_gmu_sysprof_setup,
},
.init = a6xx_gpu_init,
.get_timestamp = a6xx_gmu_get_timestamp,
@@ -2808,6 +2808,7 @@ const struct adreno_gpu_funcs a7xx_gpu_funcs = {
.create_private_vm = a6xx_create_private_vm,
.get_rptr = a6xx_get_rptr,
.progress = a6xx_progress,
+ .sysprof_setup = a6xx_gmu_sysprof_setup,
},
.init = a6xx_gpu_init,
.get_timestamp = a6xx_gmu_get_timestamp,
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
index 487c2736f2b3..186a73c0b99c 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
@@ -289,6 +289,8 @@ static int a8xx_hfi_send_perf_table(struct a6xx_gmu *gmu)
(gmu->nr_gpu_freqs * num_gx_votes * sizeof(gmu->gx_arc_votes[0])) +
(gmu->nr_gmu_freqs * num_cx_votes * sizeof(gmu->cx_arc_votes[0]));
tbl = kzalloc(size, GFP_KERNEL);
+ if (!tbl)
+ return -ENOMEM;
tbl->type = HFI_TABLE_GPU_PERF;
/* First fill GX votes */
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 4edfe80c5be7..fc38331ce640 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -17,7 +17,7 @@ MODULE_PARM_DESC(snapshot_debugbus, "Include debugbus sections in GPU devcoredum
module_param_named(snapshot_debugbus, snapshot_debugbus, bool, 0600);
int enable_preemption = -1;
-MODULE_PARM_DESC(enable_preemption, "Enable preemption (A7xx only) (1=on , 0=disable, -1=auto (default))");
+MODULE_PARM_DESC(enable_preemption, "Enable preemption (A7xx+ only) (1=on , 0=disable, -1=auto (default))");
module_param(enable_preemption, int, 0600);
bool disable_acd;
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 66f80f2d12f9..03f96a1154e1 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -376,7 +376,7 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx,
*value = adreno_gpu->info->gmem;
return 0;
case MSM_PARAM_GMEM_BASE:
- if (adreno_gpu->info->family >= ADRENO_6XX_GEN4)
+ if (adreno_gpu->info->family >= ADRENO_6XX_GEN3)
*value = 0;
else
*value = 0x100000;
@@ -424,15 +424,21 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx,
*value = vm->mm_range;
return 0;
case MSM_PARAM_HIGHEST_BANK_BIT:
+ if (!adreno_gpu->ubwc_config)
+ return UERR(ENOENT, drm, "no UBWC on this platform");
*value = adreno_gpu->ubwc_config->highest_bank_bit;
return 0;
case MSM_PARAM_RAYTRACING:
*value = adreno_gpu->has_ray_tracing;
return 0;
case MSM_PARAM_UBWC_SWIZZLE:
+ if (!adreno_gpu->ubwc_config)
+ return UERR(ENOENT, drm, "no UBWC on this platform");
*value = adreno_gpu->ubwc_config->ubwc_swizzle;
return 0;
case MSM_PARAM_MACROTILE_MODE:
+ if (!adreno_gpu->ubwc_config)
+ return UERR(ENOENT, drm, "no UBWC on this platform");
*value = adreno_gpu->ubwc_config->macrotile_mode;
return 0;
case MSM_PARAM_UCHE_TRAP_BASE:
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h
index b7b06e45b529..06da1583fb1e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_13_0_kaanapali.h
@@ -480,7 +480,7 @@ const struct dpu_mdss_cfg dpu_kaanapali_cfg = {
.wb_count = ARRAY_SIZE(kaanapali_wb),
.wb = kaanapali_wb,
.cwb_count = ARRAY_SIZE(kaanapali_cwb),
- .cwb = sm8650_cwb,
+ .cwb = kaanapali_cwb,
.intf_count = ARRAY_SIZE(kaanapali_intf),
.intf = kaanapali_intf,
.vbif = &sm8650_vbif,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
index 6e8883dbfad4..590922c4f69b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
@@ -61,7 +61,7 @@ static int _dpu_format_populate_plane_sizes_ubwc(
bool meta = MSM_FORMAT_IS_UBWC(fmt);
if (MSM_FORMAT_IS_YUV(fmt)) {
- unsigned int stride, sclines;
+ unsigned int stride, y_sclines, uv_sclines;
unsigned int y_tile_width, y_tile_height;
unsigned int y_meta_stride, y_meta_scanlines;
unsigned int uv_meta_stride, uv_meta_scanlines;
@@ -77,23 +77,25 @@ static int _dpu_format_populate_plane_sizes_ubwc(
y_tile_width = 32;
}
- sclines = round_up(fb->height, 16);
+ y_sclines = round_up(fb->height, 16);
+ uv_sclines = round_up((fb->height+1)>>1, 16);
y_tile_height = 4;
} else {
stride = round_up(fb->width, 128);
y_tile_width = 32;
- sclines = round_up(fb->height, 32);
+ y_sclines = round_up(fb->height, 32);
+ uv_sclines = round_up((fb->height+1)>>1, 32);
y_tile_height = 8;
}
layout->plane_pitch[0] = stride;
layout->plane_size[0] = round_up(layout->plane_pitch[0] *
- sclines, DPU_UBWC_PLANE_SIZE_ALIGNMENT);
+ y_sclines, DPU_UBWC_PLANE_SIZE_ALIGNMENT);
layout->plane_pitch[1] = stride;
layout->plane_size[1] = round_up(layout->plane_pitch[1] *
- sclines, DPU_UBWC_PLANE_SIZE_ALIGNMENT);
+ uv_sclines, DPU_UBWC_PLANE_SIZE_ALIGNMENT);
if (!meta)
return 0;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c
index 7545c0293efb..6f2370c9dd98 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c
@@ -5,6 +5,7 @@
#include <drm/drm_edid.h>
#include <drm/drm_framebuffer.h>
+#include <drm/drm_managed.h>
#include "dpu_writeback.h"
@@ -125,7 +126,7 @@ int dpu_writeback_init(struct drm_device *dev, struct drm_encoder *enc,
struct dpu_wb_connector *dpu_wb_conn;
int rc = 0;
- dpu_wb_conn = devm_kzalloc(dev->dev, sizeof(*dpu_wb_conn), GFP_KERNEL);
+ dpu_wb_conn = drmm_kzalloc(dev, sizeof(*dpu_wb_conn), GFP_KERNEL);
if (!dpu_wb_conn)
return -ENOMEM;
diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
index 427d3ee2b833..e603ab3817cd 100644
--- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
+++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
@@ -5,11 +5,11 @@
#define pr_fmt(fmt) "[drm:%s:%d] " fmt, __func__, __LINE__
-#include <generated/utsrelease.h>
+#include <linux/utsname.h>
#include "msm_disp_snapshot.h"
-static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *base_addr)
+static void msm_disp_state_dump_regs(u32 **reg, u32 len, void __iomem *base_addr)
{
u32 len_padded;
u32 num_rows;
@@ -19,11 +19,11 @@ static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *b
void __iomem *end_addr;
int i;
- len_padded = aligned_len * REG_DUMP_ALIGN;
- num_rows = aligned_len / REG_DUMP_ALIGN;
+ len_padded = round_up(len, REG_DUMP_ALIGN);
+ num_rows = DIV_ROUND_UP(len, REG_DUMP_ALIGN);
addr = base_addr;
- end_addr = base_addr + aligned_len;
+ end_addr = base_addr + len;
*reg = kvzalloc(len_padded, GFP_KERNEL);
if (!*reg)
@@ -48,8 +48,8 @@ static void msm_disp_state_dump_regs(u32 **reg, u32 aligned_len, void __iomem *b
static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len,
void __iomem *base_addr, struct drm_printer *p)
{
+ void __iomem *addr, *end_addr;
int i;
- void __iomem *addr;
u32 num_rows;
if (!dump_addr) {
@@ -58,6 +58,7 @@ static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len,
}
addr = base_addr;
+ end_addr = base_addr + len;
num_rows = len / REG_DUMP_ALIGN;
for (i = 0; i < num_rows; i++) {
@@ -67,6 +68,17 @@ static void msm_disp_state_print_regs(const u32 *dump_addr, u32 len,
dump_addr[i * 4 + 2], dump_addr[i * 4 + 3]);
addr += REG_DUMP_ALIGN;
}
+
+ if (addr != end_addr) {
+ drm_printf(p, "0x%lx : %08x",
+ (unsigned long)(addr - base_addr),
+ dump_addr[i * 4]);
+ if (addr + 0x4 < end_addr)
+ drm_printf(p, " %08x", dump_addr[i * 4 + 1]);
+ if (addr + 0x8 < end_addr)
+ drm_printf(p, " %08x", dump_addr[i * 4 + 2]);
+ drm_printf(p, "\n");
+ }
}
void msm_disp_state_print(struct msm_disp_state *state, struct drm_printer *p)
@@ -79,7 +91,7 @@ void msm_disp_state_print(struct msm_disp_state *state, struct drm_printer *p)
}
drm_printf(p, "---\n");
- drm_printf(p, "kernel: " UTS_RELEASE "\n");
+ drm_printf(p, "kernel: %s\n", init_utsname()->release);
drm_printf(p, "module: " KBUILD_MODNAME "\n");
drm_printf(p, "dpu devcoredump\n");
drm_printf(p, "time: %ptSp\n", &state->time);
@@ -185,7 +197,7 @@ void msm_disp_snapshot_add_block(struct msm_disp_state *disp_state, u32 len,
va_end(va);
INIT_LIST_HEAD(&new_blk->node);
- new_blk->size = ALIGN(len, REG_DUMP_ALIGN);
+ new_blk->size = len;
new_blk->base_addr = base_addr;
msm_disp_state_dump_regs(&new_blk->state, new_blk->size, base_addr);
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 565d425f88b8..982abaaac00d 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -2033,6 +2033,7 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi)
/* fixup base address by io offset */
msm_host->ctrl_base += cfg->io_offset;
+ msm_host->ctrl_size -= cfg->io_offset;
ret = devm_regulator_bulk_get_const(&pdev->dev, cfg->num_regulators,
cfg->regulator_data,
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 195f40e331e5..cc2bcd14b1c2 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -128,11 +128,10 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv,
/*
* Initialize the LRUs:
*/
- mutex_init(&priv->lru.lock);
- drm_gem_lru_init(&priv->lru.unbacked, &priv->lru.lock);
- drm_gem_lru_init(&priv->lru.pinned, &priv->lru.lock);
- drm_gem_lru_init(&priv->lru.willneed, &priv->lru.lock);
- drm_gem_lru_init(&priv->lru.dontneed, &priv->lru.lock);
+ drm_gem_lru_init(&priv->lru.unbacked);
+ drm_gem_lru_init(&priv->lru.pinned);
+ drm_gem_lru_init(&priv->lru.willneed);
+ drm_gem_lru_init(&priv->lru.dontneed);
/* Initialize stall-on-fault */
spin_lock_init(&priv->fault_stall_lock);
@@ -140,7 +139,7 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv,
/* Teach lockdep about lock ordering wrt. shrinker: */
fs_reclaim_acquire(GFP_KERNEL);
- might_lock(&priv->lru.lock);
+ might_lock(&ddev->gem_lru_mutex);
fs_reclaim_release(GFP_KERNEL);
if (priv->kms_init) {
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 6d847d593f1a..617b3c4b42c0 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -150,13 +150,6 @@ struct msm_drm_private {
* DONTNEED state (ie. can be purged)
*/
struct drm_gem_lru dontneed;
-
- /**
- * lock:
- *
- * Protects manipulation of all of the LRUs.
- */
- struct mutex lock;
} lru;
struct notifier_block vmap_notifier;
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 2cb3ab04f125..efd3d3c9a449 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -177,11 +177,11 @@ static void update_lru_locked(struct drm_gem_object *obj)
static void update_lru(struct drm_gem_object *obj)
{
- struct msm_drm_private *priv = obj->dev->dev_private;
+ struct drm_device *dev = obj->dev;
- mutex_lock(&priv->lru.lock);
+ mutex_lock(&dev->gem_lru_mutex);
update_lru_locked(obj);
- mutex_unlock(&priv->lru.lock);
+ mutex_unlock(&dev->gem_lru_mutex);
}
static struct page **get_pages(struct drm_gem_object *obj)
@@ -292,11 +292,11 @@ void msm_gem_pin_obj_locked(struct drm_gem_object *obj)
static void pin_obj_locked(struct drm_gem_object *obj)
{
- struct msm_drm_private *priv = obj->dev->dev_private;
+ struct drm_device *dev = obj->dev;
- mutex_lock(&priv->lru.lock);
+ mutex_lock(&dev->gem_lru_mutex);
msm_gem_pin_obj_locked(obj);
- mutex_unlock(&priv->lru.lock);
+ mutex_unlock(&dev->gem_lru_mutex);
}
struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj)
@@ -487,16 +487,16 @@ int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct drm_gpuva *vma)
void msm_gem_unpin_locked(struct drm_gem_object *obj)
{
- struct msm_drm_private *priv = obj->dev->dev_private;
+ struct drm_device *dev = obj->dev;
struct msm_gem_object *msm_obj = to_msm_bo(obj);
msm_gem_assert_locked(obj);
- mutex_lock(&priv->lru.lock);
+ mutex_lock(&dev->gem_lru_mutex);
msm_obj->pin_count--;
GEM_WARN_ON(msm_obj->pin_count < 0);
update_lru_locked(obj);
- mutex_unlock(&priv->lru.lock);
+ mutex_unlock(&dev->gem_lru_mutex);
}
/* Special unpin path for use in fence-signaling path, avoiding the need
@@ -507,10 +507,10 @@ void msm_gem_unpin_locked(struct drm_gem_object *obj)
*/
void msm_gem_unpin_active(struct drm_gem_object *obj)
{
- struct msm_drm_private *priv = obj->dev->dev_private;
+ struct drm_device *dev = obj->dev;
struct msm_gem_object *msm_obj = to_msm_bo(obj);
- GEM_WARN_ON(!mutex_is_locked(&priv->lru.lock));
+ GEM_WARN_ON(!mutex_is_locked(&dev->gem_lru_mutex));
msm_obj->pin_count--;
GEM_WARN_ON(msm_obj->pin_count < 0);
@@ -797,12 +797,12 @@ void msm_gem_put_vaddr(struct drm_gem_object *obj)
*/
int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
{
- struct msm_drm_private *priv = obj->dev->dev_private;
+ struct drm_device *dev = obj->dev;
struct msm_gem_object *msm_obj = to_msm_bo(obj);
msm_gem_lock(obj);
- mutex_lock(&priv->lru.lock);
+ mutex_lock(&dev->gem_lru_mutex);
if (msm_obj->madv != __MSM_MADV_PURGED)
msm_obj->madv = madv;
@@ -814,7 +814,7 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
*/
update_lru_locked(obj);
- mutex_unlock(&priv->lru.lock);
+ mutex_unlock(&dev->gem_lru_mutex);
msm_gem_unlock(obj);
@@ -824,7 +824,6 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
void msm_gem_purge(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
- struct msm_drm_private *priv = obj->dev->dev_private;
struct msm_gem_object *msm_obj = to_msm_bo(obj);
msm_gem_assert_locked(obj);
@@ -839,10 +838,10 @@ void msm_gem_purge(struct drm_gem_object *obj)
put_pages(obj);
- mutex_lock(&priv->lru.lock);
+ mutex_lock(&dev->gem_lru_mutex);
/* A one-way transition: */
msm_obj->madv = __MSM_MADV_PURGED;
- mutex_unlock(&priv->lru.lock);
+ mutex_unlock(&dev->gem_lru_mutex);
drm_gem_free_mmap_offset(obj);
diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
index 31fa51a44f86..9d2788f79ace 100644
--- a/drivers/gpu/drm/msm/msm_gem_shrinker.c
+++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
@@ -43,8 +43,7 @@ msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
}
static bool
-with_vm_locks(struct ww_acquire_ctx *ticket,
- void (*fn)(struct drm_gem_object *obj),
+with_vm_locks(void (*fn)(struct drm_gem_object *obj),
struct drm_gem_object *obj)
{
/*
@@ -52,7 +51,7 @@ with_vm_locks(struct ww_acquire_ctx *ticket,
* success paths
*/
struct drm_gpuvm_bo *vm_bo, *last_locked = NULL;
- int ret = 0;
+ bool locked = true;
drm_gem_for_each_gpuvm_bo (vm_bo, obj) {
struct dma_resv *resv = drm_gpuvm_resv(vm_bo->vm);
@@ -60,23 +59,14 @@ with_vm_locks(struct ww_acquire_ctx *ticket,
if (resv == obj->resv)
continue;
- ret = dma_resv_lock(resv, ticket);
-
- /*
- * Since we already skip the case when the VM and obj
- * share a resv (ie. _NO_SHARE objs), we don't expect
- * to hit a double-locking scenario... which the lock
- * unwinding cannot really cope with.
- */
- WARN_ON(ret == -EALREADY);
-
/*
- * Don't bother with slow-lock / backoff / retry sequence,
- * if we can't get the lock just give up and move on to
- * the next object.
+ * dma_resv_lock can't be used due to acquiring 'ticket' before the
+ * fs_reclaim lock, which is held in shrinker context
*/
- if (ret)
+ if (!dma_resv_trylock(resv)) {
+ locked = false;
goto out_unlock;
+ }
/*
* Hold a ref to prevent the vm_bo from being freed
@@ -108,11 +98,11 @@ out_unlock:
}
}
- return ret == 0;
+ return locked;
}
static bool
-purge(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket)
+purge(struct drm_gem_object *obj, struct ww_acquire_ctx *unused)
{
if (!is_purgeable(to_msm_bo(obj)))
return false;
@@ -120,11 +110,11 @@ purge(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket)
if (msm_gem_active(obj))
return false;
- return with_vm_locks(ticket, msm_gem_purge, obj);
+ return with_vm_locks(msm_gem_purge, obj);
}
static bool
-evict(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket)
+evict(struct drm_gem_object *obj, struct ww_acquire_ctx *unused)
{
if (is_unevictable(to_msm_bo(obj)))
return false;
@@ -132,7 +122,7 @@ evict(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket)
if (msm_gem_active(obj))
return false;
- return with_vm_locks(ticket, msm_gem_evict, obj);
+ return with_vm_locks(msm_gem_evict, obj);
}
static bool
@@ -164,7 +154,6 @@ static unsigned long
msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
{
struct msm_drm_private *priv = shrinker->private_data;
- struct ww_acquire_ctx ticket;
struct {
struct drm_gem_lru *lru;
bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket);
@@ -185,11 +174,14 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
for (unsigned i = 0; (nr > 0) && (i < ARRAY_SIZE(stages)); i++) {
if (!stages[i].cond)
continue;
+ /*
+ * 'ticket' not needed on trylock paths
+ */
stages[i].freed =
- drm_gem_lru_scan(stages[i].lru, nr,
+ drm_gem_lru_scan(priv->dev, stages[i].lru, nr,
&stages[i].remaining,
stages[i].shrink,
- &ticket);
+ NULL);
nr -= stages[i].freed;
freed += stages[i].freed;
remaining += stages[i].remaining;
@@ -255,7 +247,7 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
unsigned long remaining = 0;
for (idx = 0; lrus[idx] && unmapped < vmap_shrink_limit; idx++) {
- unmapped += drm_gem_lru_scan(lrus[idx],
+ unmapped += drm_gem_lru_scan(priv->dev, lrus[idx],
vmap_shrink_limit - unmapped,
&remaining,
vmap_shrink,
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 26ea8a28be47..3c6bc90c3d48 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -352,7 +352,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit)
static int submit_pin_objects(struct msm_gem_submit *submit)
{
- struct msm_drm_private *priv = submit->dev->dev_private;
+ struct drm_device *dev = submit->dev;
int i, ret = 0;
for (i = 0; i < submit->nr_bos; i++) {
@@ -381,11 +381,11 @@ static int submit_pin_objects(struct msm_gem_submit *submit)
* get_pages() which could trigger reclaim.. and if we held the LRU lock
* could trigger deadlock with the shrinker).
*/
- mutex_lock(&priv->lru.lock);
+ mutex_lock(&dev->gem_lru_mutex);
for (i = 0; i < submit->nr_bos; i++) {
msm_gem_pin_obj_locked(submit->bos[i].obj);
}
- mutex_unlock(&priv->lru.lock);
+ mutex_unlock(&dev->gem_lru_mutex);
submit->bos_pinned = true;
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index 1a952b171ed7..c4cfe036066b 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -702,7 +702,7 @@ static struct dma_fence *
msm_vma_job_run(struct drm_sched_job *_job)
{
struct msm_vm_bind_job *job = to_msm_vm_bind_job(_job);
- struct msm_drm_private *priv = job->vm->drm->dev_private;
+ struct drm_device *dev = job->vm->drm;
struct msm_gem_vm *vm = to_msm_vm(job->vm);
struct drm_gem_object *obj;
int ret = vm->unusable ? -EINVAL : 0;
@@ -745,13 +745,13 @@ msm_vma_job_run(struct drm_sched_job *_job)
if (ret)
msm_gem_vm_unusable(job->vm);
- mutex_lock(&priv->lru.lock);
+ mutex_lock(&dev->gem_lru_mutex);
job_foreach_bo (obj, job) {
msm_gem_unpin_active(obj);
}
- mutex_unlock(&priv->lru.lock);
+ mutex_unlock(&dev->gem_lru_mutex);
/* VM_BIND ops are synchronous, so no fence to wait on: */
return NULL;
@@ -1305,7 +1305,7 @@ vm_bind_job_pin_objects(struct msm_vm_bind_job *job)
return PTR_ERR(pages);
}
- struct msm_drm_private *priv = job->vm->drm->dev_private;
+ struct drm_device *dev = job->vm->drm;
/*
* A second loop while holding the LRU lock (a) avoids acquiring/dropping
@@ -1314,10 +1314,10 @@ vm_bind_job_pin_objects(struct msm_vm_bind_job *job)
* get_pages() which could trigger reclaim.. and if we held the LRU lock
* could trigger deadlock with the shrinker).
*/
- mutex_lock(&priv->lru.lock);
+ mutex_lock(&dev->gem_lru_mutex);
job_foreach_bo (obj, job)
msm_gem_pin_obj_locked(obj);
- mutex_unlock(&priv->lru.lock);
+ mutex_unlock(&dev->gem_lru_mutex);
job->bos_pinned = true;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 930e54d1b0a7..3f3925b11eea 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -13,11 +13,11 @@
#include "msm_gpu_trace.h"
//#include "adreno/adreno_gpu.h"
-#include <generated/utsrelease.h>
#include <linux/string_helpers.h>
#include <linux/devcoredump.h>
#include <linux/sched/task.h>
#include <linux/sched/mm.h>
+#include <linux/utsname.h>
/*
* Power Management:
@@ -196,7 +196,7 @@ static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset,
p = drm_coredump_printer(&iter);
drm_printf(&p, "---\n");
- drm_printf(&p, "kernel: " UTS_RELEASE "\n");
+ drm_printf(&p, "kernel: %s\n", init_utsname()->release);
drm_printf(&p, "module: " KBUILD_MODNAME "\n");
drm_printf(&p, "time: %ptSp\n", &state->time);
if (state->comm)
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 7d449e5202c5..058c71c82cf5 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -677,7 +677,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,
int prot)
{
struct msm_iommu *iommu = to_msm_iommu(mmu);
- size_t ret;
+ ssize_t ret;
WARN_ON(off != 0);
@@ -686,7 +686,8 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,
iova |= GENMASK_ULL(63, 49);
ret = iommu_map_sgtable(iommu->domain, iova, sgt, prot);
- WARN_ON(!ret);
+ if (ret < 0)
+ return ret;
return (ret == len) ? 0 : -EINVAL;
}
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 30ddb5351e98..2d6b930b766e 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -16,13 +16,13 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
struct msm_gem_submit *submit = to_msm_submit(job);
struct msm_fence_context *fctx = submit->ring->fctx;
struct msm_gpu *gpu = submit->gpu;
- struct msm_drm_private *priv = gpu->dev->dev_private;
+ struct drm_device *dev = gpu->dev;
unsigned nr_cmds = submit->nr_cmds;
int i;
msm_fence_init(submit->hw_fence, fctx);
- mutex_lock(&priv->lru.lock);
+ mutex_lock(&dev->gem_lru_mutex);
for (i = 0; i < submit->nr_bos; i++) {
struct drm_gem_object *obj = submit->bos[i].obj;
@@ -32,7 +32,7 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
submit->bos_pinned = false;
- mutex_unlock(&priv->lru.lock);
+ mutex_unlock(&dev->gem_lru_mutex);
/* TODO move submit path over to using a per-ring lock.. */
mutex_lock(&gpu->lock);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 72848ed80df7..b101e14f841e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2513,6 +2513,7 @@ static const struct nvkm_device_chip
nv170_chipset = {
.name = "GA100",
.bar = { 0x00000001, tu102_bar_new },
+ .bios = { 0x00000001, nvkm_bios_new },
.devinit = { 0x00000001, ga100_devinit_new },
.fault = { 0x00000001, tu102_fault_new },
.fb = { 0x00000001, ga100_fb_new },
@@ -2529,7 +2530,6 @@ nv170_chipset = {
.vfn = { 0x00000001, ga100_vfn_new },
.ce = { 0x000003ff, ga100_ce_new },
.fifo = { 0x00000001, ga100_fifo_new },
- .sec2 = { 0x00000001, tu102_sec2_new },
};
static const struct nvkm_device_chip
@@ -3341,7 +3341,6 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
case 0x166: device->chip = &nv166_chipset; break;
case 0x167: device->chip = &nv167_chipset; break;
case 0x168: device->chip = &nv168_chipset; break;
- case 0x170: device->chip = &nv170_chipset; break;
case 0x172: device->chip = &nv172_chipset; break;
case 0x173: device->chip = &nv173_chipset; break;
case 0x174: device->chip = &nv174_chipset; break;
@@ -3361,6 +3360,14 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
case 0x1b6: device->chip = &nv1b6_chipset; break;
case 0x1b7: device->chip = &nv1b7_chipset; break;
default:
+ if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) {
+ switch (device->chipset) {
+ case 0x170: device->chip = &nv170_chipset; break;
+ default:
+ break;
+ }
+ }
+
if (!device->chip) {
nvdev_error(device, "unknown chipset (%08x)\n", boot0);
ret = -ENODEV;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c
index fdd820eeef81..27a13aeccd3c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga100.c
@@ -41,11 +41,15 @@ ga100_gsp_flcn = {
static const struct nvkm_gsp_func
ga100_gsp = {
.flcn = &ga100_gsp_flcn,
+ .fwsec = &tu102_gsp_fwsec,
.sig_section = ".fwsignature_ga100",
.booter.ctor = tu102_gsp_booter_ctor,
+ .fwsec_sb.ctor = tu102_gsp_fwsec_sb_ctor,
+ .fwsec_sb.dtor = tu102_gsp_fwsec_sb_dtor,
+
.dtor = r535_gsp_dtor,
.oneinit = tu102_gsp_oneinit,
.init = tu102_gsp_init,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c
index dd82c76b8b9a..19cb269e7a26 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/tu102.c
@@ -318,13 +318,8 @@ tu102_gsp_oneinit(struct nvkm_gsp *gsp)
if (ret)
return ret;
- /*
- * Calculate FB layout. FRTS is a memory region created by the FWSEC-FRTS firmware.
- * FWSEC comes from VBIOS. So on systems with no VBIOS (e.g. GA100), the FRTS does
- * not exist. Therefore, use the existence of VBIOS to determine whether to reserve
- * an FRTS region.
- */
- gsp->fb.wpr2.frts.size = device->bios ? 0x100000 : 0;
+ /* Calculate FB layout. */
+ gsp->fb.wpr2.frts.size = 0x100000;
gsp->fb.wpr2.frts.addr = ALIGN_DOWN(gsp->fb.bios.addr, 0x20000) - gsp->fb.wpr2.frts.size;
gsp->fb.wpr2.boot.size = gsp->boot.fw.size;
@@ -348,12 +343,9 @@ tu102_gsp_oneinit(struct nvkm_gsp *gsp)
if (ret)
return ret;
- /* Only boot FWSEC-FRTS if it actually exists */
- if (gsp->fb.wpr2.frts.size) {
- ret = nvkm_gsp_fwsec_frts(gsp);
- if (WARN_ON(ret))
- return ret;
- }
+ ret = nvkm_gsp_fwsec_frts(gsp);
+ if (WARN_ON(ret))
+ return ret;
/* Reset GSP into RISC-V mode. */
ret = gsp->func->reset(gsp);
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index d6863b28ddc5..d592f4f4b939 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -208,6 +208,7 @@ config DRM_PANEL_HIMAX_HX83121A
depends on OF
depends on DRM_MIPI_DSI
depends on BACKLIGHT_CLASS_DEVICE
+ select DRM_DISPLAY_DSC_HELPER
select DRM_KMS_HELPER
help
Say Y here if you want to enable support for Himax HX83121A-based
diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
index d5fe105bdbdd..658ce64c71eb 100644
--- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
+++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
@@ -1324,6 +1324,8 @@ static int boe_panel_disable(struct drm_panel *panel)
mipi_dsi_dcs_set_display_off_multi(&ctx);
mipi_dsi_dcs_enter_sleep_mode_multi(&ctx);
+ boe->dsi->mode_flags |= MIPI_DSI_MODE_LPM;
+
mipi_dsi_msleep(&ctx, 150);
return ctx.accum_err;
diff --git a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c
index 4f8d6d8c07e4..dbdb7e3cb7b6 100644
--- a/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c
+++ b/drivers/gpu/drm/panel/panel-feiyang-fy07024di26a30d.c
@@ -98,9 +98,7 @@ static int feiyang_enable(struct drm_panel *panel)
/* T12 (video & logic signal rise + backlight rise) T12 >= 200ms */
msleep(200);
- mipi_dsi_dcs_set_display_on(ctx->dsi);
-
- return 0;
+ return mipi_dsi_dcs_set_display_on(ctx->dsi);
}
static int feiyang_disable(struct drm_panel *panel)
diff --git a/drivers/gpu/drm/panel/panel-himax-hx83102.c b/drivers/gpu/drm/panel/panel-himax-hx83102.c
index 8b2a68ee851e..a5e5c9ea7a73 100644
--- a/drivers/gpu/drm/panel/panel-himax-hx83102.c
+++ b/drivers/gpu/drm/panel/panel-himax-hx83102.c
@@ -937,6 +937,8 @@ static int hx83102_disable(struct drm_panel *panel)
mipi_dsi_dcs_set_display_off_multi(&dsi_ctx);
mipi_dsi_dcs_enter_sleep_mode_multi(&dsi_ctx);
+ dsi->mode_flags |= MIPI_DSI_MODE_LPM;
+
mipi_dsi_msleep(&dsi_ctx, 150);
return dsi_ctx.accum_err;
diff --git a/drivers/gpu/drm/panel/panel-himax-hx83121a.c b/drivers/gpu/drm/panel/panel-himax-hx83121a.c
index ebe643ba4184..bed79aa06f46 100644
--- a/drivers/gpu/drm/panel/panel-himax-hx83121a.c
+++ b/drivers/gpu/drm/panel/panel-himax-hx83121a.c
@@ -596,8 +596,8 @@ static int himax_probe(struct mipi_dsi_device *dsi)
ctx = devm_drm_panel_alloc(dev, struct himax, panel, &himax_panel_funcs,
DRM_MODE_CONNECTOR_DSI);
- if (!ctx)
- return -ENOMEM;
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
ret = devm_regulator_bulk_get_const(&dsi->dev,
ARRAY_SIZE(himax_supplies),
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 711f5101aa04..074c0995ddc2 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -390,6 +390,8 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data,
true, timeout);
if (!ret)
ret = timeout ? -ETIMEDOUT : -EBUSY;
+ else if (ret > 0)
+ ret = 0;
drm_gem_object_put(gem_obj);
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index 2bbb1168a3ff..1e6a2392d7c6 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -118,12 +118,13 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Complete initialization. */
ret = drm_dev_register(&qdev->ddev, ent->driver_data);
if (ret)
- goto modeset_cleanup;
+ goto poll_fini;
drm_client_setup(&qdev->ddev, NULL);
return 0;
-modeset_cleanup:
+poll_fini:
+ drm_kms_helper_poll_fini(&qdev->ddev);
qxl_modeset_fini(qdev);
unload:
qxl_device_fini(qdev);
@@ -154,6 +155,7 @@ qxl_pci_remove(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
+ drm_kms_helper_poll_fini(dev);
drm_dev_unregister(dev);
drm_atomic_helper_shutdown(dev);
if (pci_is_vga(pdev) && pdev->revision < 5)
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 22321eb95b7d..703848fac189 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -2461,7 +2461,8 @@ static void ci_register_patching_mc_arb(struct radeon_device *rdev,
if (patch &&
((rdev->pdev->device == 0x67B0) ||
- (rdev->pdev->device == 0x67B1))) {
+ (rdev->pdev->device == 0x67B1)) &&
+ (rdev->pdev->revision == 0)) {
if ((memory_clock > 100000) && (memory_clock <= 125000)) {
tmp2 = (((0x31 * engine_clock) / 125000) - 1) & 0xff;
*dram_timimg2 &= ~0x00ff0000;
@@ -3304,7 +3305,8 @@ static int ci_populate_all_memory_levels(struct radeon_device *rdev)
pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1;
if ((dpm_table->mclk_table.count >= 2) &&
- ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1))) {
+ ((rdev->pdev->device == 0x67B0) || (rdev->pdev->device == 0x67B1)) &&
+ (rdev->pdev->revision == 0)) {
pi->smc_state_table.MemoryLevel[1].MinVddc =
pi->smc_state_table.MemoryLevel[0].MinVddc;
pi->smc_state_table.MemoryLevel[1].MinVddcPhases =
@@ -4493,7 +4495,8 @@ static int ci_register_patching_mc_seq(struct radeon_device *rdev,
if (patch &&
((rdev->pdev->device == 0x67B0) ||
- (rdev->pdev->device == 0x67B1))) {
+ (rdev->pdev->device == 0x67B1)) &&
+ (rdev->pdev->revision == 0)) {
for (i = 0; i < table->last; i++) {
if (table->last >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 3142ef4da7f4..9196f85db9ce 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -312,8 +312,10 @@ static int evergreen_surface_check(struct radeon_cs_parser *p,
case ARRAY_2D_TILED_THIN1:
return evergreen_surface_check_2d(p, surf, prefix);
default:
- dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
- __func__, __LINE__, prefix, surf->mode);
+ if (prefix) {
+ dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
+ __func__, __LINE__, prefix, surf->mode);
+ }
return -EINVAL;
}
return -EINVAL;
diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c
index b7397827889c..360a88ca8f0c 100644
--- a/drivers/gpu/drm/sti/sti_hda.c
+++ b/drivers/gpu/drm/sti/sti_hda.c
@@ -741,6 +741,7 @@ static int sti_hda_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct sti_hda *hda;
struct resource *res;
+ int ret;
DRM_INFO("%s\n", __func__);
@@ -779,7 +780,9 @@ static int sti_hda_probe(struct platform_device *pdev)
return PTR_ERR(hda->clk_hddac);
}
- drm_bridge_add(&hda->bridge);
+ ret = devm_drm_bridge_add(dev, &hda->bridge);
+ if (ret)
+ return ret;
platform_set_drvdata(pdev, hda);
@@ -788,10 +791,7 @@ static int sti_hda_probe(struct platform_device *pdev)
static void sti_hda_remove(struct platform_device *pdev)
{
- struct sti_hda *hda = platform_get_drvdata(pdev);
-
component_del(&pdev->dev, &sti_hda_ops);
- drm_bridge_remove(&hda->bridge);
}
static const struct of_device_id hda_of_match[] = {
diff --git a/drivers/gpu/drm/sysfb/ofdrm.c b/drivers/gpu/drm/sysfb/ofdrm.c
index d38ba70f4e0d..247cf13c80a0 100644
--- a/drivers/gpu/drm/sysfb/ofdrm.c
+++ b/drivers/gpu/drm/sysfb/ofdrm.c
@@ -350,6 +350,7 @@ static void ofdrm_pci_release(void *data)
struct pci_dev *pcidev = data;
pci_disable_device(pcidev);
+ pci_dev_put(pcidev);
}
static int ofdrm_device_init_pci(struct ofdrm_device *odev)
@@ -375,6 +376,7 @@ static int ofdrm_device_init_pci(struct ofdrm_device *odev)
if (ret) {
drm_err(dev, "pci_enable_device(%s) failed: %d\n",
dev_name(&pcidev->dev), ret);
+ pci_dev_put(pcidev);
return ret;
}
ret = devm_add_action_or_reset(&pdev->dev, ofdrm_pci_release, pcidev);
diff --git a/drivers/gpu/drm/tiny/appletbdrm.c b/drivers/gpu/drm/tiny/appletbdrm.c
index 3bae91d7eefe..278bb23fe4c8 100644
--- a/drivers/gpu/drm/tiny/appletbdrm.c
+++ b/drivers/gpu/drm/tiny/appletbdrm.c
@@ -353,7 +353,7 @@ static int appletbdrm_primary_plane_helper_atomic_check(struct drm_plane *plane,
frames_size +
sizeof(struct appletbdrm_fb_request_footer), 16);
- appletbdrm_state->request = kzalloc(request_size, GFP_KERNEL);
+ appletbdrm_state->request = kvzalloc(request_size, GFP_KERNEL);
if (!appletbdrm_state->request)
return -ENOMEM;
@@ -543,7 +543,7 @@ static void appletbdrm_primary_plane_destroy_state(struct drm_plane *plane,
{
struct appletbdrm_plane_state *appletbdrm_state = to_appletbdrm_plane_state(state);
- kfree(appletbdrm_state->request);
+ kvfree(appletbdrm_state->request);
kfree(appletbdrm_state->response);
__drm_gem_destroy_shadow_plane_state(&appletbdrm_state->base);
diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c
index 222e4ae1abbd..5d8dc5efec77 100644
--- a/drivers/gpu/drm/tiny/bochs.c
+++ b/drivers/gpu/drm/tiny/bochs.c
@@ -761,25 +761,21 @@ static int bochs_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent
ret = pcim_enable_device(pdev);
if (ret)
- goto err_free_dev;
+ return ret;
pci_set_drvdata(pdev, dev);
ret = bochs_load(bochs);
if (ret)
- goto err_free_dev;
+ return ret;
ret = drm_dev_register(dev, 0);
if (ret)
- goto err_free_dev;
+ return ret;
drm_client_setup(dev, NULL);
return ret;
-
-err_free_dev:
- drm_dev_put(dev);
- return ret;
}
static void bochs_pci_remove(struct pci_dev *pdev)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index d85f0a37ac35..bcd76f6bb7f0 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -739,7 +739,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
may_evict = (force_space && place->mem_type != TTM_PL_SYSTEM);
ret = ttm_resource_alloc(bo, place, res, force_space ? &limit_pool : NULL);
if (ret) {
- if (ret != -ENOSPC && ret != -EAGAIN) {
+ if (ret != -ENOSPC) {
dmem_cgroup_pool_state_put(limit_pool);
return ret;
}
@@ -1177,17 +1177,13 @@ ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
bdev->funcs->swap_notify(bo);
if (ttm_tt_is_populated(tt)) {
- spin_lock(&bdev->lru_lock);
- ttm_resource_del_bulk_move(bo->resource, bo);
- spin_unlock(&bdev->lru_lock);
-
ret = ttm_tt_swapout(bdev, tt, swapout_walk->gfp_flags);
-
- spin_lock(&bdev->lru_lock);
- if (ret)
- ttm_resource_add_bulk_move(bo->resource, bo);
- ttm_resource_move_to_lru_tail(bo->resource);
- spin_unlock(&bdev->lru_lock);
+ if (!ret) {
+ spin_lock(&bdev->lru_lock);
+ ttm_resource_del_bulk_move_unevictable(bo->resource, bo);
+ ttm_resource_move_to_lru_tail(bo->resource);
+ spin_unlock(&bdev->lru_lock);
+ }
}
out:
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index f83b7d5ec6c6..3e3c201a0222 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -1112,19 +1112,14 @@ long ttm_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
if (lret < 0)
return lret;
- if (bo->bulk_move) {
- spin_lock(&bdev->lru_lock);
- ttm_resource_del_bulk_move(bo->resource, bo);
- spin_unlock(&bdev->lru_lock);
- }
-
lret = ttm_tt_backup(bdev, bo->ttm, (struct ttm_backup_flags)
{.purge = flags.purge,
.writeback = flags.writeback});
- if (lret <= 0 && bo->bulk_move) {
+ if (lret > 0) {
spin_lock(&bdev->lru_lock);
- ttm_resource_add_bulk_move(bo->resource, bo);
+ ttm_resource_del_bulk_move_unevictable(bo->resource, bo);
+ ttm_resource_move_to_lru_tail(bo->resource);
spin_unlock(&bdev->lru_lock);
}
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 26a3689e5fd9..278bbe7a11ad 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -206,6 +206,14 @@ error_free:
return NULL;
}
+static void __free_pages_gpu_account(struct page *p, unsigned int order,
+ bool reclaim)
+{
+ mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE,
+ -(1 << order));
+ __free_pages(p, order);
+}
+
/* Reset the caching and pages of size 1 << order */
static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching,
unsigned int order, struct page *p, bool reclaim)
@@ -223,9 +231,7 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching,
#endif
if (!pool || !ttm_pool_uses_dma_alloc(pool)) {
- mod_lruvec_page_state(p, reclaim ? NR_GPU_RECLAIM : NR_GPU_ACTIVE,
- -(1 << order));
- __free_pages(p, order);
+ __free_pages_gpu_account(p, order, reclaim);
return;
}
@@ -606,7 +612,7 @@ static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore,
*/
ttm_pool_split_for_swap(restore->pool, p);
copy_highpage(restore->alloced_page + i, p);
- __free_pages(p, 0);
+ __free_pages_gpu_account(p, 0, false);
}
restore->restored_pages++;
@@ -1068,7 +1074,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt,
if (flags->purge) {
shrunken += num_pages;
page->private = 0;
- __free_pages(page, order);
+ __free_pages_gpu_account(page, order, false);
memset(tt->pages + i, 0,
num_pages * sizeof(*tt->pages));
}
@@ -1109,7 +1115,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt,
}
handle = shandle;
tt->pages[i] = ttm_backup_handle_to_page_ptr(handle);
- put_page(page);
+ __free_pages_gpu_account(page, 0, false);
shrunken++;
}
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 9f36631d48b6..154d6739256f 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -292,6 +292,19 @@ void ttm_resource_del_bulk_move(struct ttm_resource *res,
ttm_lru_bulk_move_del(bo->bulk_move, res);
}
+/*
+ * Remove a resource from its bulk_move, bypassing the unevictable check.
+ * Use only when the resource is known to still be tracked in the range despite
+ * the BO having just become unevictable; asserts that this is the case.
+ */
+void ttm_resource_del_bulk_move_unevictable(struct ttm_resource *res,
+ struct ttm_buffer_object *bo)
+{
+ WARN_ON_ONCE(!ttm_resource_unevictable(res, bo));
+ if (bo->bulk_move)
+ ttm_lru_bulk_move_del(bo->bulk_move, res);
+}
+
/* Move a resource to the LRU or bulk tail */
void ttm_resource_move_to_lru_tail(struct ttm_resource *res)
{
@@ -385,8 +398,11 @@ int ttm_resource_alloc(struct ttm_buffer_object *bo,
if (man->cg) {
ret = dmem_cgroup_try_charge(man->cg, bo->base.size, &pool, ret_limit_pool);
- if (ret)
+ if (ret) {
+ if (ret == -EAGAIN)
+ ret = -ENOSPC;
return ret;
+ }
}
ret = man->func->alloc(man, bo, place, res_ptr);
diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c
index 08a0e9480d70..17950fe3a0ec 100644
--- a/drivers/gpu/drm/udl/udl_main.c
+++ b/drivers/gpu/drm/udl/udl_main.c
@@ -285,13 +285,12 @@ static struct urb *udl_get_urb_locked(struct udl_device *udl, long timeout)
return unode->urb;
}
-#define GET_URB_TIMEOUT HZ
struct urb *udl_get_urb(struct udl_device *udl)
{
struct urb *urb;
spin_lock_irq(&udl->urbs.lock);
- urb = udl_get_urb_locked(udl, GET_URB_TIMEOUT);
+ urb = udl_get_urb_locked(udl, HZ * 2);
spin_unlock_irq(&udl->urbs.lock);
return urb;
}
diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c
index 231e829bd709..1ca073a4ecb2 100644
--- a/drivers/gpu/drm/udl/udl_modeset.c
+++ b/drivers/gpu/drm/udl/udl_modeset.c
@@ -21,6 +21,7 @@
#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_print.h>
#include <drm/drm_probe_helper.h>
#include <drm/drm_vblank.h>
@@ -342,8 +343,10 @@ static void udl_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atom
return;
urb = udl_get_urb(udl);
- if (!urb)
+ if (!urb) {
+ drm_err_ratelimited(dev, "get urb failed when enabling crtc\n");
goto out;
+ }
buf = (char *)urb->transfer_buffer;
buf = udl_vidreg_lock(buf);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 1855ef5b3b5f..94bf628dc91c 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -126,20 +126,6 @@ v3d_performance_query_info_free(struct v3d_performance_query_info *query_info,
}
static void
-v3d_cpu_job_free(struct drm_sched_job *sched_job)
-{
- struct v3d_cpu_job *job = to_cpu_job(sched_job);
-
- v3d_timestamp_query_info_free(&job->timestamp_query,
- job->timestamp_query.count);
-
- v3d_performance_query_info_free(&job->performance_query,
- job->performance_query.count);
-
- v3d_job_cleanup(&job->base);
-}
-
-static void
v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
{
struct v3d_perfmon *perfmon = v3d->global_perfmon;
@@ -830,7 +816,7 @@ static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
static const struct drm_sched_backend_ops v3d_cpu_sched_ops = {
.run_job = v3d_cpu_job_run,
- .free_job = v3d_cpu_job_free
+ .free_job = v3d_sched_job_free
};
static int
diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c
index ee4512db294b..3ddd53b6f437 100644
--- a/drivers/gpu/drm/v3d/v3d_submit.c
+++ b/drivers/gpu/drm/v3d/v3d_submit.c
@@ -123,6 +123,24 @@ v3d_render_job_free(struct kref *ref)
v3d_job_free(ref);
}
+static void
+v3d_cpu_job_free(struct kref *ref)
+{
+ struct v3d_cpu_job *job = container_of(ref, struct v3d_cpu_job,
+ base.refcount);
+
+ v3d_timestamp_query_info_free(&job->timestamp_query,
+ job->timestamp_query.count);
+
+ v3d_performance_query_info_free(&job->performance_query,
+ job->performance_query.count);
+
+ if (job->indirect_csd.indirect)
+ drm_gem_object_put(job->indirect_csd.indirect);
+
+ v3d_job_free(ref);
+}
+
void v3d_job_cleanup(struct v3d_job *job)
{
if (!job)
@@ -1302,7 +1320,7 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type);
ret = v3d_job_init(v3d, file_priv, &cpu_job->base,
- v3d_job_free, 0, &se, V3D_CPU);
+ v3d_cpu_job_free, 0, &se, V3D_CPU);
if (ret) {
v3d_job_deallocate((void *)&cpu_job);
goto fail;
@@ -1385,8 +1403,6 @@ fail:
v3d_job_cleanup((void *)csd_job);
v3d_job_cleanup(clean_job);
v3d_put_multisync_post_deps(&se);
- kvfree(cpu_job->timestamp_query.queries);
- kvfree(cpu_job->performance_query.queries);
return ret;
}
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index f17660a71a3e..2f3531950aa4 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -317,6 +317,7 @@ virtio_gpu_array_from_handles(struct drm_file *drm_file, u32 *handles, u32 nents
void virtio_gpu_array_add_obj(struct virtio_gpu_object_array *objs,
struct drm_gem_object *obj);
int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs);
+int virtio_gpu_lock_one_resv_uninterruptible(struct virtio_gpu_object_array *objs);
void virtio_gpu_array_unlock_resv(struct virtio_gpu_object_array *objs);
void virtio_gpu_array_add_fence(struct virtio_gpu_object_array *objs,
struct dma_fence *fence);
diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
index f22dc5c21cd4..435d37d36034 100644
--- a/drivers/gpu/drm/virtio/virtgpu_gem.c
+++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
@@ -238,6 +238,23 @@ int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs)
return ret;
}
+int virtio_gpu_lock_one_resv_uninterruptible(struct virtio_gpu_object_array *objs)
+{
+ int ret;
+
+ if (objs->nents != 1)
+ return -EINVAL;
+
+ dma_resv_lock(objs->objs[0]->resv, NULL);
+
+ ret = dma_resv_reserve_fences(objs->objs[0]->resv, 1);
+ if (ret) {
+ virtio_gpu_array_unlock_resv(objs);
+ return ret;
+ }
+ return 0;
+}
+
void virtio_gpu_array_unlock_resv(struct virtio_gpu_object_array *objs)
{
if (objs->nents == 1) {
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
index a126d1b25f46..652352424744 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -215,7 +215,10 @@ static void virtio_gpu_resource_flush(struct drm_plane *plane,
if (!objs)
return;
virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]);
- virtio_gpu_array_lock_resv(objs);
+ if (virtio_gpu_lock_one_resv_uninterruptible(objs)) {
+ virtio_gpu_array_put_free(objs);
+ return;
+ }
virtio_gpu_cmd_resource_flush(vgdev, bo->hw_res_handle, x, y,
width, height, objs,
vgplane_st->fence);
@@ -459,7 +462,10 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane,
if (!objs)
return;
virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]);
- virtio_gpu_array_lock_resv(objs);
+ if (virtio_gpu_lock_one_resv_uninterruptible(objs)) {
+ virtio_gpu_array_put_free(objs);
+ return;
+ }
virtio_gpu_cmd_transfer_to_host_2d
(vgdev, 0,
plane->state->crtc_w,
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 49de1c22a469..03242e8b3d87 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -88,6 +88,7 @@ xe-y += xe_bb.o \
xe_irq.o \
xe_late_bind_fw.o \
xe_lrc.o \
+ xe_mem_pool.o \
xe_migrate.o \
xe_mmio.o \
xe_mmio_gem.o \
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index 29c72aa4b0d2..33494b86205d 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -37,9 +37,17 @@ static bool intel_hdcp_gsc_check_status(struct drm_device *drm)
struct xe_device *xe = to_xe_device(drm);
struct xe_tile *tile = xe_device_get_root_tile(xe);
struct xe_gt *gt = tile->media_gt;
- struct xe_gsc *gsc = &gt->uc.gsc;
+ struct xe_gsc *gsc;
+
+ if (!gt) {
+ drm_dbg_kms(&xe->drm,
+ "not checking GSC status for HDCP2.x: media GT not present or disabled\n");
+ return false;
+ }
+
+ gsc = &gt->uc.gsc;
- if (!gsc || !xe_uc_fw_is_available(&gsc->fw)) {
+ if (!xe_uc_fw_is_available(&gsc->fw)) {
drm_dbg_kms(&xe->drm,
"GSC Components not ready for HDCP2.x\n");
return false;
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 4ebaa0888a43..353fe0bd49bf 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -152,10 +152,11 @@
#define XEHPG_INSTDONE_GEOM_SVGUNIT XE_REG_MCR(0x666c)
-#define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED)
+#define CACHE_MODE_1 XE_REG_MCR(0x7004, XE_REG_OPTION_MASKED)
#define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11)
#define COMMON_SLICE_CHICKEN1 XE_REG(0x7010, XE_REG_OPTION_MASKED)
+#define XEHP_COMMON_SLICE_CHICKEN1 XE_REG_MCR(0x7010, XE_REG_OPTION_MASKED)
#define DISABLE_BOTTOM_CLIP_RECTANGLE_TEST REG_BIT(14)
#define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED)
@@ -178,6 +179,7 @@
#define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108)
#define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED)
+#define XEHP_COMMON_SLICE_CHICKEN4 XE_REG_MCR(0x7300, XE_REG_OPTION_MASKED)
#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12)
#define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6)
#define HW_FILTERING REG_BIT(5)
@@ -583,7 +585,7 @@
#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32)
#define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32)
-#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0)
+#define ROW_CHICKEN5 XE_REG_MCR(0xe7f0, XE_REG_OPTION_MASKED)
#define CPSS_AWARE_DIS REG_BIT(3)
#define SARB_CHICKEN1 XE_REG_MCR(0xe90c)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index a7c2dc7f224c..6b518858538f 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -897,10 +897,10 @@ void xe_bo_set_purgeable_state(struct xe_bo *bo,
new_state == XE_MADV_PURGEABLE_PURGED);
/* Once purged, always purged - cannot transition out */
- xe_assert(xe, !(bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED &&
+ xe_assert(xe, !(bo->purgeable.state == XE_MADV_PURGEABLE_PURGED &&
new_state != XE_MADV_PURGEABLE_PURGED));
- bo->madv_purgeable = new_state;
+ bo->purgeable.state = new_state;
xe_bo_set_purgeable_shrinker(bo, new_state);
}
@@ -2322,8 +2322,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
}
/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
- if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
+ if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) {
+ xe_bo_free(bo);
return ERR_PTR(-EINVAL);
+ }
if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
!(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
@@ -2342,8 +2344,10 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
alignment = SZ_4K >> PAGE_SHIFT;
}
- if (type == ttm_bo_type_device && aligned_size != size)
+ if (type == ttm_bo_type_device && aligned_size != size) {
+ xe_bo_free(bo);
return ERR_PTR(-EINVAL);
+ }
if (!bo) {
bo = xe_bo_alloc();
@@ -2364,7 +2368,7 @@ struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
INIT_LIST_HEAD(&bo->vram_userfault_link);
/* Initialize purge advisory state */
- bo->madv_purgeable = XE_MADV_PURGEABLE_WILLNEED;
+ bo->purgeable.state = XE_MADV_PURGEABLE_WILLNEED;
drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 68dea7d25a6b..6340317f7d2e 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -251,7 +251,7 @@ static inline bool xe_bo_is_protected(const struct xe_bo *bo)
static inline bool xe_bo_is_purged(struct xe_bo *bo)
{
xe_bo_assert_held(bo);
- return bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED;
+ return bo->purgeable.state == XE_MADV_PURGEABLE_PURGED;
}
/**
@@ -268,11 +268,95 @@ static inline bool xe_bo_is_purged(struct xe_bo *bo)
static inline bool xe_bo_madv_is_dontneed(struct xe_bo *bo)
{
xe_bo_assert_held(bo);
- return bo->madv_purgeable == XE_MADV_PURGEABLE_DONTNEED;
+ return bo->purgeable.state == XE_MADV_PURGEABLE_DONTNEED;
}
void xe_bo_set_purgeable_state(struct xe_bo *bo, enum xe_madv_purgeable_state new_state);
+/**
+ * xe_bo_willneed_get_locked() - Acquire a WILLNEED holder on a BO
+ * @bo: Buffer object
+ *
+ * Increments willneed_count and, on a 0->1 transition, promotes the BO
+ * from DONTNEED to WILLNEED. PURGED is terminal and is never modified.
+ *
+ * Caller must hold the BO's dma-resv lock.
+ */
+static inline void xe_bo_willneed_get_locked(struct xe_bo *bo)
+{
+ xe_bo_assert_held(bo);
+
+ /* Imported BOs are owned externally; do not track purgeability. */
+ if (drm_gem_is_imported(&bo->ttm.base))
+ return;
+
+ if (bo->purgeable.willneed_count++ == 0 && xe_bo_madv_is_dontneed(bo))
+ xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_WILLNEED);
+}
+
+/**
+ * xe_bo_willneed_put_locked() - Release a WILLNEED holder on a BO
+ * @bo: Buffer object
+ *
+ * Decrements willneed_count and, on a 1->0 transition, marks the BO
+ * DONTNEED only if it still has VMAs (implying all active VMAs are
+ * DONTNEED). If the last VMA is being removed, preserve the current BO
+ * state to match the previous VMA-walk semantics.
+ *
+ * PURGED is terminal and the BO state is never modified.
+ *
+ * Caller must hold the BO's dma-resv lock.
+ */
+static inline void xe_bo_willneed_put_locked(struct xe_bo *bo)
+{
+ xe_bo_assert_held(bo);
+
+ if (drm_gem_is_imported(&bo->ttm.base))
+ return;
+
+ xe_assert(xe_bo_device(bo), bo->purgeable.willneed_count > 0);
+ if (--bo->purgeable.willneed_count == 0 && bo->purgeable.vma_count > 0 &&
+ !xe_bo_is_purged(bo))
+ xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_DONTNEED);
+}
+
+/**
+ * xe_bo_vma_count_inc_locked() - Account a new VMA on a BO
+ * @bo: Buffer object
+ *
+ * Increments vma_count.
+ *
+ * Caller must hold the BO's dma-resv lock.
+ */
+static inline void xe_bo_vma_count_inc_locked(struct xe_bo *bo)
+{
+ xe_bo_assert_held(bo);
+
+ if (drm_gem_is_imported(&bo->ttm.base))
+ return;
+
+ bo->purgeable.vma_count++;
+}
+
+/**
+ * xe_bo_vma_count_dec_locked() - Account a VMA removal on a BO
+ * @bo: Buffer object
+ *
+ * Decrements vma_count.
+ *
+ * Caller must hold the BO's dma-resv lock.
+ */
+static inline void xe_bo_vma_count_dec_locked(struct xe_bo *bo)
+{
+ xe_bo_assert_held(bo);
+
+ if (drm_gem_is_imported(&bo->ttm.base))
+ return;
+
+ xe_assert(xe_bo_device(bo), bo->purgeable.vma_count > 0);
+ bo->purgeable.vma_count--;
+}
+
static inline void xe_bo_unpin_map_no_vm(struct xe_bo *bo)
{
if (likely(bo)) {
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index ff8317bfc1ae..077e35b4cdce 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -18,6 +18,7 @@
#include "xe_ggtt_types.h"
struct xe_device;
+struct xe_mem_pool_node;
struct xe_vm;
#define XE_BO_MAX_PLACEMENTS 3
@@ -88,7 +89,7 @@ struct xe_bo {
bool ccs_cleared;
/** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */
- struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
+ struct xe_mem_pool_node *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
/**
* @cpu_caching: CPU caching mode. Currently only used for userspace
@@ -110,10 +111,32 @@ struct xe_bo {
u64 min_align;
/**
- * @madv_purgeable: user space advise on BO purgeability, protected
- * by BO's dma-resv lock.
+ * @purgeable: Purgeability state and accounting.
+ *
+ * All fields are protected by the BO's dma-resv lock.
*/
- u32 madv_purgeable;
+ struct {
+ /**
+ * @purgeable.state: BO purgeability state
+ * (WILLNEED/DONTNEED/PURGED).
+ */
+ u32 state;
+
+ /**
+ * @purgeable.vma_count: Number of VMAs currently mapping this BO.
+ */
+ u32 vma_count;
+
+ /**
+ * @purgeable.willneed_count: Number of active WILLNEED holders.
+ *
+ * Counts WILLNEED VMAs plus active dma-buf exports for
+ * non-imported BOs. The BO flips to DONTNEED on a 1->0
+ * transition only when VMAs still exist; if the last VMA is
+ * removed, the previous BO state is preserved.
+ */
+ u32 willneed_count;
+ } purgeable;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 7f9602b3363d..8a920e58245c 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -193,6 +193,18 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
return 0;
}
+static void xe_dma_buf_release(struct dma_buf *dmabuf)
+{
+ struct drm_gem_object *obj = dmabuf->priv;
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+
+ xe_bo_lock(bo, false);
+ xe_bo_willneed_put_locked(bo);
+ xe_bo_unlock(bo);
+
+ drm_gem_dmabuf_release(dmabuf);
+}
+
static const struct dma_buf_ops xe_dmabuf_ops = {
.attach = xe_dma_buf_attach,
.detach = xe_dma_buf_detach,
@@ -200,7 +212,7 @@ static const struct dma_buf_ops xe_dmabuf_ops = {
.unpin = xe_dma_buf_unpin,
.map_dma_buf = xe_dma_buf_map,
.unmap_dma_buf = xe_dma_buf_unmap,
- .release = drm_gem_dmabuf_release,
+ .release = xe_dma_buf_release,
.begin_cpu_access = xe_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
.vmap = drm_gem_dmabuf_vmap,
@@ -241,26 +253,33 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags)
ret = -EINVAL;
goto out_unlock;
}
+
+ xe_bo_willneed_get_locked(bo);
xe_bo_unlock(bo);
ret = ttm_bo_setup_export(&bo->ttm, &ctx);
if (ret)
- return ERR_PTR(ret);
+ goto out_put;
buf = drm_gem_prime_export(obj, flags);
- if (!IS_ERR(buf))
- buf->ops = &xe_dmabuf_ops;
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto out_put;
+ }
+ buf->ops = &xe_dmabuf_ops;
return buf;
+out_put:
+ xe_bo_lock(bo, false);
+ xe_bo_willneed_put_locked(bo);
out_unlock:
xe_bo_unlock(bo);
return ERR_PTR(ret);
}
static struct drm_gem_object *
-xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
- struct dma_buf *dma_buf)
+xe_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
{
struct dma_resv *resv = dma_buf->resv;
struct xe_device *xe = to_xe_device(dev);
@@ -281,7 +300,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
if (ret)
break;
- bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
+ bo = xe_bo_init_locked(xe, NULL, NULL, resv, NULL, dma_buf->size,
0, /* Will require 1way or 2way for vm_bind */
ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec);
drm_exec_retry_on_contention(&exec);
@@ -332,7 +351,6 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
const struct dma_buf_attach_ops *attach_ops;
struct dma_buf_attachment *attach;
struct drm_gem_object *obj;
- struct xe_bo *bo;
if (dma_buf->ops == &xe_dmabuf_ops) {
obj = dma_buf->priv;
@@ -348,13 +366,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
}
/*
- * Don't publish the bo until we have a valid attachment, and a
- * valid attachment needs the bo address. So pre-create a bo before
- * creating the attachment and publish.
+ * This needs to happen before the attach, since it will create a new
+ * attachment for this, and add it to the list of attachments, at which
+ * point it is globally visible, and at any point the export side can
+ * call into on invalidate_mappings callback, which require a working
+ * object.
*/
- bo = xe_bo_alloc();
- if (IS_ERR(bo))
- return ERR_CAST(bo);
+ obj = xe_dma_buf_create_obj(dev, dma_buf);
+ if (IS_ERR(obj))
+ return obj;
attach_ops = &xe_dma_buf_attach_ops;
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
@@ -362,26 +382,15 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
attach_ops = test->attach_ops;
#endif
- attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base);
+ attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, obj);
if (IS_ERR(attach)) {
- obj = ERR_CAST(attach);
- goto out_err;
+ xe_bo_put(gem_to_xe_bo(obj));
+ return ERR_CAST(attach);
}
- /* Errors here will take care of freeing the bo. */
- obj = xe_dma_buf_init_obj(dev, bo, dma_buf);
- if (IS_ERR(obj))
- return obj;
-
-
get_dma_buf(dma_buf);
obj->import_attach = attach;
return obj;
-
-out_err:
- xe_bo_free(bo);
-
- return obj;
}
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
index c34408cfd292..dddcdd0bb7a3 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.c
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -869,14 +869,14 @@ static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
struct xe_eu_stall_data_stream *stream = file->private_data;
struct xe_gt *gt = stream->gt;
- drm_dev_put(&gt->tile->xe->drm);
-
mutex_lock(&gt->eu_stall->stream_lock);
xe_eu_stall_disable_locked(stream);
xe_eu_stall_data_buf_destroy(stream);
xe_eu_stall_stream_free(stream);
mutex_unlock(&gt->eu_stall->stream_lock);
+ drm_dev_put(&gt->tile->xe->drm);
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index b287d0e0e60a..071b8c41df43 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -1405,7 +1405,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
if (q->vm && q->hwe->hw_engine_group) {
err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q);
if (err)
- goto put_exec_queue;
+ goto kill_exec_queue;
}
}
@@ -1416,12 +1416,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
/* user id alloc must always be last in ioctl to prevent UAF */
err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
if (err)
- goto kill_exec_queue;
+ goto del_hw_engine_group;
args->exec_queue_id = id;
return 0;
+del_hw_engine_group:
+ if (q->vm && q->hwe && q->hwe->hw_engine_group)
+ xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
kill_exec_queue:
xe_exec_queue_kill(q);
delete_queue_group:
@@ -1760,7 +1763,7 @@ void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
unsigned int type)
{
- xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ xe_assert(gt_to_xe(q->gt), type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
dma_fence_put(q->tlb_inval[type].last_fence);
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index e5c234f3d795..aab59dc647fb 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -166,7 +166,7 @@ static int query_compatibility_version(struct xe_gsc *gsc)
&rd_offset);
if (err) {
xe_gt_err(gt, "HuC: invalid GSC reply for version query (err=%d)\n", err);
- return err;
+ goto out_bo;
}
compat->major = version_query_rd(xe, &bo->vmap, rd_offset, proj_major);
@@ -482,8 +482,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
EXEC_QUEUE_FLAG_PERMANENT, 0);
if (IS_ERR(q)) {
xe_gt_err(gt, "Failed to create queue for GSC submission\n");
- err = PTR_ERR(q);
- goto out_bo;
+ return PTR_ERR(q);
}
wq = alloc_ordered_workqueue("gsc-ordered-wq", 0);
@@ -506,8 +505,6 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
out_q:
xe_exec_queue_put(q);
-out_bo:
- xe_bo_unpin_map_no_vm(bo);
return err;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
index 87a164efcc33..01fe03b9efe8 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
@@ -385,10 +385,10 @@ static int pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid, void *buf
if (xe_gt_is_media_type(gt))
for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++)
- regs[n] = xe_mmio_read32(&gt->mmio, MED_VF_SW_FLAG(n));
+ regs[n] = xe_mmio_read32(&mmio, MED_VF_SW_FLAG(n));
else
for (n = 0; n < VF_SW_FLAG_COUNT; n++)
- regs[n] = xe_mmio_read32(&gt->mmio, VF_SW_FLAG(n));
+ regs[n] = xe_mmio_read32(&mmio, VF_SW_FLAG(n));
return 0;
}
@@ -407,10 +407,10 @@ static int pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid,
if (xe_gt_is_media_type(gt))
for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++)
- xe_mmio_write32(&gt->mmio, MED_VF_SW_FLAG(n), regs[n]);
+ xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), regs[n]);
else
for (n = 0; n < VF_SW_FLAG_COUNT; n++)
- xe_mmio_write32(&gt->mmio, VF_SW_FLAG(n), regs[n]);
+ xe_mmio_write32(&mmio, VF_SW_FLAG(n), regs[n]);
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c
index 7d532bded02a..a85ba4435378 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.c
@@ -114,8 +114,10 @@ int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32
* VFs with no events are not printed.
*
* This function can only be called on PF.
+ *
+ * Return: always 0
*/
-void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p)
+int xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p)
{
unsigned int n, total_vfs = xe_gt_sriov_pf_get_totalvfs(gt);
const struct xe_gt_sriov_monitor *data;
@@ -144,4 +146,6 @@ void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p
#undef __format
#undef __value
}
+
+ return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h
index 7ca9351a271b..0b8f088d3a16 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_monitor.h
@@ -13,7 +13,7 @@ struct drm_printer;
struct xe_gt;
void xe_gt_sriov_pf_monitor_flr(struct xe_gt *gt, u32 vfid);
-void xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_pf_monitor_print_events(struct xe_gt *gt, struct drm_printer *p);
#ifdef CONFIG_PCI_IOV
int xe_gt_sriov_pf_monitor_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 8989c8e1be95..0cd9d77f3351 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -1137,13 +1137,15 @@ void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
}
/**
- * xe_gt_sriov_vf_print_config - Print VF self config.
+ * xe_gt_sriov_vf_print_config() - Print VF self config.
* @gt: the &xe_gt
* @p: the &drm_printer
*
* This function is for VF use only.
+ *
+ * Return: always 0.
*/
-void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p)
+int xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
struct xe_device *xe = gt_to_xe(gt);
@@ -1170,16 +1172,20 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs);
drm_printf(p, "GuC doorbells:\t%u\n", config->num_dbs);
+
+ return 0;
}
/**
- * xe_gt_sriov_vf_print_runtime - Print VF's runtime regs received from PF.
+ * xe_gt_sriov_vf_print_runtime() - Print VF's runtime regs received from PF.
* @gt: the &xe_gt
* @p: the &drm_printer
*
* This function is for VF use only.
+ *
+ * Return: always 0.
*/
-void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p)
+int xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p)
{
struct vf_runtime_reg *vf_regs = gt->sriov.vf.runtime.regs;
unsigned int size = gt->sriov.vf.runtime.num_regs;
@@ -1188,16 +1194,20 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p)
for (; size--; vf_regs++)
drm_printf(p, "%#x = %#x\n", vf_regs->offset, vf_regs->value);
+
+ return 0;
}
/**
- * xe_gt_sriov_vf_print_version - Print VF ABI versions.
+ * xe_gt_sriov_vf_print_version() - Print VF ABI versions.
* @gt: the &xe_gt
* @p: the &drm_printer
*
* This function is for VF use only.
+ *
+ * Return: always 0.
*/
-void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p)
+int xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
struct xe_uc_fw_version *guc_version = &gt->sriov.vf.guc_version;
@@ -1227,6 +1237,8 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p)
GUC_RELAY_VERSION_LATEST_MAJOR, GUC_RELAY_VERSION_LATEST_MINOR);
drm_printf(p, "\thandshake:\t%u.%u\n",
pf_version->major, pf_version->minor);
+
+ return 0;
}
static bool vf_post_migration_shutdown(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
index a6f7127521a5..79878f21b1da 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -35,9 +35,9 @@ bool xe_gt_sriov_vf_sched_groups_enabled(struct xe_gt *gt);
u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg);
void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val);
-void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p);
-void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p);
-void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p);
int xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt);
int xe_vf_migration_fixups_complete_count(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 8b55cf25a75f..fffb5d631b69 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -145,6 +145,13 @@ struct xe_gt {
/** @info.has_indirect_ring_state: GT has indirect ring state support */
u8 has_indirect_ring_state:1;
/**
+ * @info.has_xe2_blt_instructions: GT supports Xe2-style MEM_SET
+ * and MEM_COPY blitter functionality. Note that despite the
+ * name, some Xe1 platforms may also support this "Xe2-style"
+ * feature.
+ */
+ u8 has_xe2_blt_instructions:1;
+ /**
* @info.num_geometry_xecore_fuse_regs: Number of 32b-bit fuse
* registers the geometry XeCore mask spans.
*/
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 81b5f01b1f65..2b835d48b565 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -512,12 +512,9 @@ static void guc_golden_lrc_init(struct xe_guc_ads *ads)
* that starts after the execlists LRC registers. This is
* required to allow the GuC to restore just the engine state
* when a watchdog reset occurs.
- * We calculate the engine state size by removing the size of
- * what comes before it in the context image (which is identical
- * on all engines).
*/
ads_blob_write(ads, ads.eng_state_size[guc_class],
- real_size - xe_lrc_skip_size(xe));
+ xe_lrc_engine_state_size(gt, class));
ads_blob_write(ads, ads.golden_context_lrca[guc_class],
addr_ggtt);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index a145234f662b..912182dc7704 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -261,22 +261,10 @@ static void guc_submit_sw_fini(struct drm_device *drm, void *arg)
static void guc_submit_fini(void *arg)
{
struct xe_guc *guc = arg;
-
- /* Forcefully kill any remaining exec queues */
- xe_guc_ct_stop(&guc->ct);
- guc_submit_reset_prepare(guc);
- xe_guc_softreset(guc);
- xe_guc_submit_stop(guc);
- xe_uc_fw_sanitize(&guc->fw);
- xe_guc_submit_pause_abort(guc);
-}
-
-static void guc_submit_wedged_fini(void *arg)
-{
- struct xe_guc *guc = arg;
struct xe_exec_queue *q;
unsigned long index;
+ /* Drop any wedged queue refs */
mutex_lock(&guc->submission_state.lock);
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
if (exec_queue_wedged(q)) {
@@ -286,6 +274,14 @@ static void guc_submit_wedged_fini(void *arg)
}
}
mutex_unlock(&guc->submission_state.lock);
+
+ /* Forcefully kill any remaining exec queues */
+ xe_guc_ct_stop(&guc->ct);
+ guc_submit_reset_prepare(guc);
+ xe_guc_softreset(guc);
+ xe_guc_submit_stop(guc);
+ xe_uc_fw_sanitize(&guc->fw);
+ xe_guc_submit_pause_abort(guc);
}
static const struct xe_exec_queue_ops guc_exec_queue_ops;
@@ -1320,10 +1316,8 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
void xe_guc_submit_wedge(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
- struct xe_gt *gt = guc_to_gt(guc);
struct xe_exec_queue *q;
unsigned long index;
- int err;
xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
@@ -1335,15 +1329,6 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
return;
if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) {
- err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
- guc_submit_wedged_fini, guc);
- if (err) {
- xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; "
- "Although device is wedged.\n",
- xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET));
- return;
- }
-
mutex_lock(&guc->submission_state.lock);
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
if (xe_exec_queue_get_unless_zero(q))
@@ -1688,6 +1673,14 @@ static void guc_exec_queue_fini(struct xe_exec_queue *q)
struct xe_guc_exec_queue *ge = q->guc;
struct xe_guc *guc = exec_queue_to_guc(q);
+ if (xe_exec_queue_is_multi_queue_secondary(q)) {
+ struct xe_exec_queue_group *group = q->multi_queue.group;
+
+ mutex_lock(&group->list_lock);
+ list_del(&q->multi_queue.link);
+ mutex_unlock(&group->list_lock);
+ }
+
release_guc_id(guc, q);
xe_sched_entity_fini(&ge->entity);
xe_sched_fini(&ge->sched);
@@ -1709,14 +1702,6 @@ static void __guc_exec_queue_destroy_async(struct work_struct *w)
guard(xe_pm_runtime)(guc_to_xe(guc));
trace_xe_exec_queue_destroy(q);
- if (xe_exec_queue_is_multi_queue_secondary(q)) {
- struct xe_exec_queue_group *group = q->multi_queue.group;
-
- mutex_lock(&group->list_lock);
- list_del(&q->multi_queue.link);
- mutex_unlock(&group->list_lock);
- }
-
/* Confirm no work left behind accessing device structures */
cancel_delayed_work_sync(&ge->sched.base.work_tdr);
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 9d12a0d2f0b5..4af9f0d7c6f3 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -746,9 +746,16 @@ size_t xe_lrc_reg_size(struct xe_device *xe)
return 80 * sizeof(u32);
}
-size_t xe_lrc_skip_size(struct xe_device *xe)
+/**
+ * xe_lrc_engine_state_size() - Get size of the engine state within LRC
+ * @gt: the &xe_gt struct instance
+ * @class: Hardware engine class
+ *
+ * Returns: Size of the engine state
+ */
+size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class)
{
- return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe);
+ return xe_gt_lrc_hang_replay_size(gt, class) - xe_lrc_reg_size(gt_to_xe(gt));
}
static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
@@ -1214,7 +1221,7 @@ static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc,
if (xe_gt_WARN_ON(lrc->gt, max_len < 3))
return -ENOSPC;
- *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
+ *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_LRM_CS_MMIO | MI_LRI_NUM_REGS(1);
*cmd++ = CS_DEBUG_MODE2(0).addr;
*cmd++ = REG_MASKED_FIELD_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index e7c975f9e2d9..5440663183f6 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -130,7 +130,7 @@ u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc);
struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc);
size_t xe_lrc_reg_size(struct xe_device *xe);
-size_t xe_lrc_skip_size(struct xe_device *xe);
+size_t xe_lrc_engine_state_size(struct xe_gt *gt, enum xe_engine_class class);
void xe_lrc_dump_default(struct drm_printer *p,
struct xe_gt *gt,
diff --git a/drivers/gpu/drm/xe/xe_mem_pool.c b/drivers/gpu/drm/xe/xe_mem_pool.c
new file mode 100644
index 000000000000..d5e24d6aa88d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mem_pool.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#include <linux/kernel.h>
+
+#include <drm/drm_managed.h>
+
+#include "instructions/xe_mi_commands.h"
+#include "xe_bo.h"
+#include "xe_device_types.h"
+#include "xe_map.h"
+#include "xe_mem_pool.h"
+#include "xe_mem_pool_types.h"
+#include "xe_tile_printk.h"
+
+/**
+ * struct xe_mem_pool - DRM MM pool for sub-allocating memory from a BO on an
+ * XE tile.
+ *
+ * The XE memory pool is a DRM MM manager that provides sub-allocation of memory
+ * from a backing buffer object (BO) on a specific XE tile. It is designed to
+ * manage memory for GPU workloads, allowing for efficient allocation and
+ * deallocation of memory regions within the BO.
+ *
+ * The memory pool maintains a primary BO that is pinned in the GGTT and mapped
+ * into the CPU address space for direct access. Optionally, it can also maintain
+ * a shadow BO that can be used for atomic updates to the primary BO's contents.
+ *
+ * The API provided by the memory pool allows clients to allocate and free memory
+ * regions, retrieve GPU and CPU addresses, and synchronize data between the
+ * primary and shadow BOs as needed.
+ */
+struct xe_mem_pool {
+ /** @base: Range allocator over [0, @size) in bytes */
+ struct drm_mm base;
+ /** @bo: Active pool BO (GGTT-pinned, CPU-mapped). */
+ struct xe_bo *bo;
+ /** @shadow: Shadow BO for atomic command updates. */
+ struct xe_bo *shadow;
+ /** @swap_guard: Timeline guard updating @bo and @shadow */
+ struct mutex swap_guard;
+ /** @cpu_addr: CPU virtual address of the active BO. */
+ void *cpu_addr;
+ /** @is_iomem: Indicates if the BO mapping is I/O memory. */
+ bool is_iomem;
+};
+
+static struct xe_mem_pool *node_to_pool(struct xe_mem_pool_node *node)
+{
+ return container_of(node->sa_node.mm, struct xe_mem_pool, base);
+}
+
+static struct xe_tile *pool_to_tile(struct xe_mem_pool *pool)
+{
+ return pool->bo->tile;
+}
+
+static void fini_pool_action(struct drm_device *drm, void *arg)
+{
+ struct xe_mem_pool *pool = arg;
+
+ if (pool->is_iomem)
+ kvfree(pool->cpu_addr);
+
+ drm_mm_takedown(&pool->base);
+}
+
+static int pool_shadow_init(struct xe_mem_pool *pool)
+{
+ struct xe_tile *tile = pool->bo->tile;
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_bo *shadow;
+ int ret;
+
+ xe_assert(xe, !pool->shadow);
+
+ ret = drmm_mutex_init(&xe->drm, &pool->swap_guard);
+ if (ret)
+ return ret;
+
+ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&pool->swap_guard);
+ fs_reclaim_release(GFP_KERNEL);
+ }
+ shadow = xe_managed_bo_create_pin_map(xe, tile,
+ xe_bo_size(pool->bo),
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
+ if (IS_ERR(shadow))
+ return PTR_ERR(shadow);
+
+ pool->shadow = shadow;
+
+ return 0;
+}
+
+/**
+ * xe_mem_pool_init() - Initialize memory pool.
+ * @tile: the &xe_tile where allocate.
+ * @size: number of bytes to allocate.
+ * @guard: the size of the guard region at the end of the BO that is not
+ * sub-allocated, in bytes.
+ * @flags: flags to use to create shadow pool.
+ *
+ * Initializes a memory pool for sub-allocating memory from a backing BO on the
+ * specified XE tile. The backing BO is pinned in the GGTT and mapped into
+ * the CPU address space for direct access. Optionally, a shadow BO can also be
+ * initialized for atomic updates to the primary BO's contents.
+ *
+ * Returns: a pointer to the &xe_mem_pool, or an error pointer on failure.
+ */
+struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size,
+ u32 guard, int flags)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_mem_pool *pool;
+ struct xe_bo *bo;
+ u32 managed_size;
+ int ret;
+
+ xe_tile_assert(tile, size > guard);
+ managed_size = size - guard;
+
+ pool = drmm_kzalloc(&xe->drm, sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return ERR_PTR(-ENOMEM);
+
+ bo = xe_managed_bo_create_pin_map(xe, tile, size,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE |
+ XE_BO_FLAG_PINNED_NORESTORE);
+ if (IS_ERR(bo)) {
+ xe_tile_err(tile, "Failed to prepare %uKiB BO for mem pool (%pe)\n",
+ size / SZ_1K, bo);
+ return ERR_CAST(bo);
+ }
+ pool->bo = bo;
+ pool->is_iomem = bo->vmap.is_iomem;
+
+ if (pool->is_iomem) {
+ pool->cpu_addr = kvzalloc(size, GFP_KERNEL);
+ if (!pool->cpu_addr)
+ return ERR_PTR(-ENOMEM);
+ } else {
+ pool->cpu_addr = bo->vmap.vaddr;
+ }
+
+ if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY) {
+ ret = pool_shadow_init(pool);
+
+ if (ret)
+ goto out_err;
+ }
+
+ drm_mm_init(&pool->base, 0, managed_size);
+ ret = drmm_add_action_or_reset(&xe->drm, fini_pool_action, pool);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return pool;
+
+out_err:
+ if (flags & XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY)
+ xe_tile_err(tile,
+ "Failed to initialize shadow BO for mem pool (%d)\n", ret);
+ if (bo->vmap.is_iomem)
+ kvfree(pool->cpu_addr);
+ return ERR_PTR(ret);
+}
+
+/**
+ * xe_mem_pool_sync() - Copy the entire contents of the main pool to shadow pool.
+ * @pool: the memory pool containing the primary and shadow BOs.
+ *
+ * Copies the entire contents of the primary pool to the shadow pool. This must
+ * be done after xe_mem_pool_init() with the XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY
+ * flag to ensure that the shadow pool has the same initial contents as the primary
+ * pool. After this initial synchronization, clients can choose to synchronize the
+ * shadow pool with the primary pool on a node basis using
+ * xe_mem_pool_sync_shadow_locked() as needed.
+ *
+ * Return: None.
+ */
+void xe_mem_pool_sync(struct xe_mem_pool *pool)
+{
+ struct xe_tile *tile = pool_to_tile(pool);
+ struct xe_device *xe = tile_to_xe(tile);
+
+ xe_tile_assert(tile, pool->shadow);
+
+ xe_map_memcpy_to(xe, &pool->shadow->vmap, 0,
+ pool->cpu_addr, xe_bo_size(pool->bo));
+}
+
+/**
+ * xe_mem_pool_swap_shadow_locked() - Swap the primary BO with the shadow BO.
+ * @pool: the memory pool containing the primary and shadow BOs.
+ *
+ * Swaps the primary buffer object with the shadow buffer object in the mem
+ * pool. This allows for atomic updates to the contents of the primary BO
+ * by first writing to the shadow BO and then swapping it with the primary BO.
+ * Swap_guard must be held to ensure synchronization with any concurrent swap
+ * operations.
+ *
+ * Return: None.
+ */
+void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool)
+{
+ struct xe_tile *tile = pool_to_tile(pool);
+
+ xe_tile_assert(tile, pool->shadow);
+ lockdep_assert_held(&pool->swap_guard);
+
+ swap(pool->bo, pool->shadow);
+ if (!pool->bo->vmap.is_iomem)
+ pool->cpu_addr = pool->bo->vmap.vaddr;
+}
+
+/**
+ * xe_mem_pool_sync_shadow_locked() - Copy node from primary pool to shadow pool.
+ * @node: the node allocated in the memory pool.
+ *
+ * Copies the specified batch buffer from the primary pool to the shadow pool.
+ * Swap_guard must be held to ensure synchronization with any concurrent swap
+ * operations.
+ *
+ * Return: None.
+ */
+void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node)
+{
+ struct xe_mem_pool *pool = node_to_pool(node);
+ struct xe_tile *tile = pool_to_tile(pool);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct drm_mm_node *sa_node = &node->sa_node;
+
+ xe_tile_assert(tile, pool->shadow);
+ lockdep_assert_held(&pool->swap_guard);
+
+ xe_map_memcpy_to(xe, &pool->shadow->vmap,
+ sa_node->start,
+ pool->cpu_addr + sa_node->start,
+ sa_node->size);
+}
+
+/**
+ * xe_mem_pool_gpu_addr() - Retrieve GPU address of memory pool.
+ * @pool: the memory pool
+ *
+ * Returns: GGTT address of the memory pool.
+ */
+u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool)
+{
+ return xe_bo_ggtt_addr(pool->bo);
+}
+
+/**
+ * xe_mem_pool_cpu_addr() - Retrieve CPU address of manager pool.
+ * @pool: the memory pool
+ *
+ * Returns: CPU virtual address of memory pool.
+ */
+void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool)
+{
+ return pool->cpu_addr;
+}
+
+/**
+ * xe_mem_pool_bo_swap_guard() - Retrieve the mutex used to guard swap
+ * operations on a memory pool.
+ * @pool: the memory pool
+ *
+ * Returns: Swap guard mutex or NULL if shadow pool is not created.
+ */
+struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool)
+{
+ if (!pool->shadow)
+ return NULL;
+
+ return &pool->swap_guard;
+}
+
+/**
+ * xe_mem_pool_bo_flush_write() - Copy the data from the sub-allocation
+ * to the GPU memory.
+ * @node: the node allocated in the memory pool to flush.
+ */
+void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node)
+{
+ struct xe_mem_pool *pool = node_to_pool(node);
+ struct xe_tile *tile = pool_to_tile(pool);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct drm_mm_node *sa_node = &node->sa_node;
+
+ if (!pool->bo->vmap.is_iomem)
+ return;
+
+ xe_map_memcpy_to(xe, &pool->bo->vmap, sa_node->start,
+ pool->cpu_addr + sa_node->start,
+ sa_node->size);
+}
+
+/**
+ * xe_mem_pool_bo_sync_read() - Copy the data from GPU memory to the
+ * sub-allocation.
+ * @node: the node allocated in the memory pool to read back.
+ */
+void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node)
+{
+ struct xe_mem_pool *pool = node_to_pool(node);
+ struct xe_tile *tile = pool_to_tile(pool);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct drm_mm_node *sa_node = &node->sa_node;
+
+ if (!pool->bo->vmap.is_iomem)
+ return;
+
+ xe_map_memcpy_from(xe, pool->cpu_addr + sa_node->start,
+ &pool->bo->vmap, sa_node->start, sa_node->size);
+}
+
+/**
+ * xe_mem_pool_alloc_node() - Allocate a new node for use with xe_mem_pool.
+ *
+ * Returns: node structure or an ERR_PTR(-ENOMEM).
+ */
+struct xe_mem_pool_node *xe_mem_pool_alloc_node(void)
+{
+ struct xe_mem_pool_node *node = kzalloc_obj(*node);
+
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ return node;
+}
+
+/**
+ * xe_mem_pool_insert_node() - Insert a node into the memory pool.
+ * @pool: the memory pool to insert into
+ * @node: the node to insert
+ * @size: the size of the node to be allocated in bytes.
+ *
+ * Inserts a node into the specified memory pool using drm_mm for
+ * allocation.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int xe_mem_pool_insert_node(struct xe_mem_pool *pool,
+ struct xe_mem_pool_node *node, u32 size)
+{
+ if (!pool)
+ return -EINVAL;
+
+ return drm_mm_insert_node(&pool->base, &node->sa_node, size);
+}
+
+/**
+ * xe_mem_pool_free_node() - Free a node allocated from the memory pool.
+ * @node: the node to free
+ *
+ * Returns: None.
+ */
+void xe_mem_pool_free_node(struct xe_mem_pool_node *node)
+{
+ if (!node)
+ return;
+
+ drm_mm_remove_node(&node->sa_node);
+ kfree(node);
+}
+
+/**
+ * xe_mem_pool_node_cpu_addr() - Retrieve CPU address of the node.
+ * @node: the node allocated in the memory pool
+ *
+ * Returns: CPU virtual address of the node.
+ */
+void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node)
+{
+ struct xe_mem_pool *pool = node_to_pool(node);
+
+ return xe_mem_pool_cpu_addr(pool) + node->sa_node.start;
+}
+
+/**
+ * xe_mem_pool_dump() - Dump the state of the DRM MM manager for debugging.
+ * @pool: the memory pool info be dumped.
+ * @p: The DRM printer to use for output.
+ *
+ * Only the drm managed region is dumped, not the state of the BOs or any other
+ * pool information.
+ *
+ * Returns: None.
+ */
+void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p)
+{
+ drm_mm_print(&pool->base, p);
+}
diff --git a/drivers/gpu/drm/xe/xe_mem_pool.h b/drivers/gpu/drm/xe/xe_mem_pool.h
new file mode 100644
index 000000000000..89cd2555fe91
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mem_pool.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+#ifndef _XE_MEM_POOL_H_
+#define _XE_MEM_POOL_H_
+
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+#include <drm/drm_mm.h>
+#include "xe_mem_pool_types.h"
+
+struct drm_printer;
+struct xe_mem_pool;
+struct xe_tile;
+
+struct xe_mem_pool *xe_mem_pool_init(struct xe_tile *tile, u32 size,
+ u32 guard, int flags);
+void xe_mem_pool_sync(struct xe_mem_pool *pool);
+void xe_mem_pool_swap_shadow_locked(struct xe_mem_pool *pool);
+void xe_mem_pool_sync_shadow_locked(struct xe_mem_pool_node *node);
+u64 xe_mem_pool_gpu_addr(struct xe_mem_pool *pool);
+void *xe_mem_pool_cpu_addr(struct xe_mem_pool *pool);
+struct mutex *xe_mem_pool_bo_swap_guard(struct xe_mem_pool *pool);
+void xe_mem_pool_bo_flush_write(struct xe_mem_pool_node *node);
+void xe_mem_pool_bo_sync_read(struct xe_mem_pool_node *node);
+struct xe_mem_pool_node *xe_mem_pool_alloc_node(void);
+int xe_mem_pool_insert_node(struct xe_mem_pool *pool,
+ struct xe_mem_pool_node *node, u32 size);
+void xe_mem_pool_free_node(struct xe_mem_pool_node *node);
+void *xe_mem_pool_node_cpu_addr(struct xe_mem_pool_node *node);
+void xe_mem_pool_dump(struct xe_mem_pool *pool, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mem_pool_types.h b/drivers/gpu/drm/xe/xe_mem_pool_types.h
new file mode 100644
index 000000000000..d5e926c93351
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mem_pool_types.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2026 Intel Corporation
+ */
+
+#ifndef _XE_MEM_POOL_TYPES_H_
+#define _XE_MEM_POOL_TYPES_H_
+
+#include <drm/drm_mm.h>
+
+#define XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY BIT(0)
+
+/**
+ * struct xe_mem_pool_node - Sub-range allocations from mem pool.
+ */
+struct xe_mem_pool_node {
+ /** @sa_node: drm_mm_node for this allocation. */
+ struct drm_mm_node sa_node;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
index 811e07136efb..579af47edc61 100644
--- a/drivers/gpu/drm/xe/xe_memirq.c
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -427,13 +427,25 @@ static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector,
return __memirq_received(memirq, vector, offset, name, true);
}
+static void memirq_assume_received(struct xe_memirq *memirq, const char *source,
+ u16 offset, const char *status)
+{
+ memirq_debug(memirq, "ASSUME %s %s(%u)\n", source, status, offset);
+}
+
static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *status,
struct xe_hw_engine *hwe)
{
memirq_debug(memirq, "STATUS %s %*ph\n", hwe->name, 16, status->vaddr);
- if (memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name))
- xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT);
+ /*
+ * The programming note says to assume that GT_MI_USER_INTERRUPT is always
+ * set. Check and clear related status byte just for a debug.
+ */
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEMIRQ) &&
+ !memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name))
+ memirq_assume_received(memirq, hwe->name, ilog2(GT_MI_USER_INTERRUPT), "USER");
+ xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT);
}
static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *status,
@@ -443,8 +455,14 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat
memirq_debug(memirq, "STATUS %s %*ph\n", name, 16, status->vaddr);
- if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name))
- xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST);
+ /*
+ * The programming note says to assume that GUC_INTR_GUC2HOST is always
+ * set. Check and clear related status byte just for a debug.
+ */
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEMIRQ) &&
+ !memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name))
+ memirq_assume_received(memirq, name, ilog2(GUC_INTR_GUC2HOST), "GUC2HOST");
+ xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST);
/*
* This is a software interrupt that must be cleared after it's consumed
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index fc918b4fba54..a22413f892a0 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -29,6 +29,7 @@
#include "xe_hw_engine.h"
#include "xe_lrc.h"
#include "xe_map.h"
+#include "xe_mem_pool.h"
#include "xe_mocs.h"
#include "xe_printk.h"
#include "xe_pt.h"
@@ -1166,11 +1167,12 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
u32 batch_size, batch_size_allocated;
struct xe_device *xe = gt_to_xe(gt);
struct xe_res_cursor src_it, ccs_it;
+ struct xe_mem_pool *bb_pool;
struct xe_sriov_vf_ccs_ctx *ctx;
- struct xe_sa_manager *bb_pool;
u64 size = xe_bo_size(src_bo);
- struct xe_bb *bb = NULL;
+ struct xe_mem_pool_node *bb;
u64 src_L0, src_L0_ofs;
+ struct xe_bb xe_bb_tmp;
u32 src_L0_pt;
int err;
@@ -1208,18 +1210,18 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
size -= src_L0;
}
- bb = xe_bb_alloc(gt);
+ bb = xe_mem_pool_alloc_node();
if (IS_ERR(bb))
return PTR_ERR(bb);
bb_pool = ctx->mem.ccs_bb_pool;
- scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) {
- xe_sa_bo_swap_shadow(bb_pool);
+ scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
+ xe_mem_pool_swap_shadow_locked(bb_pool);
- err = xe_bb_init(bb, bb_pool, batch_size);
+ err = xe_mem_pool_insert_node(bb_pool, bb, batch_size * sizeof(u32));
if (err) {
xe_gt_err(gt, "BB allocation failed.\n");
- xe_bb_free(bb, NULL);
+ kfree(bb);
return err;
}
@@ -1227,6 +1229,7 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
size = xe_bo_size(src_bo);
batch_size = 0;
+ xe_bb_tmp = (struct xe_bb){ .cs = xe_mem_pool_node_cpu_addr(bb), .len = 0 };
/*
* Emit PTE and copy commands here.
* The CCS copy command can only support limited size. If the size to be
@@ -1255,24 +1258,27 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
batch_size += EMIT_COPY_CCS_DW;
- emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src);
+ emit_pte(m, &xe_bb_tmp, src_L0_pt, false, true, &src_it, src_L0, src);
- emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
+ emit_pte(m, &xe_bb_tmp, ccs_pt, false, false, &ccs_it, ccs_size, src);
- bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
- flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt,
+ xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
+ flush_flags);
+ flush_flags = xe_migrate_ccs_copy(m, &xe_bb_tmp, src_L0_ofs, src_is_pltt,
src_L0_ofs, dst_is_pltt,
src_L0, ccs_ofs, true);
- bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
+ xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
+ flush_flags);
size -= src_L0;
}
- xe_assert(xe, (batch_size_allocated == bb->len));
+ xe_assert(xe, (batch_size_allocated == xe_bb_tmp.len));
+ xe_assert(xe, bb->sa_node.size == xe_bb_tmp.len * sizeof(u32));
src_bo->bb_ccs[read_write] = bb;
xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
- xe_sa_bo_sync_shadow(bb->bo);
+ xe_mem_pool_sync_shadow_locked(bb);
}
return 0;
@@ -1297,10 +1303,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
enum xe_sriov_vf_ccs_rw_ctxs read_write)
{
- struct xe_bb *bb = src_bo->bb_ccs[read_write];
+ struct xe_mem_pool_node *bb = src_bo->bb_ccs[read_write];
struct xe_device *xe = xe_bo_device(src_bo);
+ struct xe_mem_pool *bb_pool;
struct xe_sriov_vf_ccs_ctx *ctx;
- struct xe_sa_manager *bb_pool;
u32 *cs;
xe_assert(xe, IS_SRIOV_VF(xe));
@@ -1308,17 +1314,17 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
ctx = &xe->sriov.vf.ccs.contexts[read_write];
bb_pool = ctx->mem.ccs_bb_pool;
- guard(mutex) (xe_sa_bo_swap_guard(bb_pool));
- xe_sa_bo_swap_shadow(bb_pool);
-
- cs = xe_sa_bo_cpu_addr(bb->bo);
- memset(cs, MI_NOOP, bb->len * sizeof(u32));
- xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
+ scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
+ xe_mem_pool_swap_shadow_locked(bb_pool);
- xe_sa_bo_sync_shadow(bb->bo);
+ cs = xe_mem_pool_node_cpu_addr(bb);
+ memset(cs, MI_NOOP, bb->sa_node.size);
+ xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
- xe_bb_free(bb, NULL);
- src_bo->bb_ccs[read_write] = NULL;
+ xe_mem_pool_sync_shadow_locked(bb);
+ xe_mem_pool_free_node(bb);
+ src_bo->bb_ccs[read_write] = NULL;
+ }
}
/**
@@ -1518,23 +1524,9 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
bb->len += len;
}
-static bool has_service_copy_support(struct xe_gt *gt)
-{
- /*
- * What we care about is whether the architecture was designed with
- * service copy functionality (specifically the new MEM_SET / MEM_COPY
- * instructions) so check the architectural engine list rather than the
- * actual list since these instructions are usable on BCS0 even if
- * all of the actual service copy engines (BCS1-BCS8) have been fused
- * off.
- */
- return gt->info.engine_mask & GENMASK(XE_HW_ENGINE_BCS8,
- XE_HW_ENGINE_BCS1);
-}
-
static u32 emit_clear_cmd_len(struct xe_gt *gt)
{
- if (has_service_copy_support(gt))
+ if (gt->info.has_xe2_blt_instructions)
return PVC_MEM_SET_CMD_LEN_DW;
else
return XY_FAST_COLOR_BLT_DW;
@@ -1543,7 +1535,7 @@ static u32 emit_clear_cmd_len(struct xe_gt *gt)
static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
u32 size, u32 pitch, bool is_vram)
{
- if (has_service_copy_support(gt))
+ if (gt->info.has_xe2_blt_instructions)
emit_clear_link_copy(gt, bb, src_ofs, size, pitch);
else
emit_clear_main_copy(gt, bb, src_ofs, size, pitch,
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 6337e671c97a..d908f4e03906 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -2032,8 +2032,10 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
if (XE_IOCTL_DBG(oa->xe, !param.exec_q))
return -ENOENT;
- if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1))
- return -EOPNOTSUPP;
+ if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) {
+ ret = -EOPNOTSUPP;
+ goto err_exec_q;
+ }
}
/*
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 01673d2b2464..c2ecd27ec770 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -118,6 +118,7 @@ static const struct xe_graphics_desc graphics_xe2 = {
static const struct xe_graphics_desc graphics_xe3p_lpg = {
XE2_GFX_FEATURES,
+ .has_indirect_ring_state = 1,
.multi_queue_engine_class_mask = BIT(XE_ENGINE_CLASS_COPY) | BIT(XE_ENGINE_CLASS_COMPUTE),
.num_geometry_xecore_fuse_regs = 3,
.num_compute_xecore_fuse_regs = 3,
@@ -851,6 +852,15 @@ static struct xe_gt *alloc_primary_gt(struct xe_tile *tile,
gt->info.num_compute_xecore_fuse_regs = graphics_desc->num_compute_xecore_fuse_regs;
/*
+ * Even if the service copy engines wind up being fused off, their
+ * presence in the IP descriptor indicates that the platform supports
+ * Xe2-style MEM_SET and MEM_COPY functionality.
+ */
+ if (graphics_desc->hw_engine_mask & GENMASK(XE_HW_ENGINE_BCS8,
+ XE_HW_ENGINE_BCS1))
+ gt->info.has_xe2_blt_instructions = true;
+
+ /*
* Before media version 13, the media IP was part of the primary GT
* so we need to add the media engines to the primary GT's engine list.
*/
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 80577e4b7437..8cc313182968 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -226,7 +226,7 @@ void xe_reg_whitelist_print_entry(struct drm_printer *p, unsigned int indent,
}
range_start = reg & REG_GENMASK(25, range_bit);
- range_end = range_start | REG_GENMASK(range_bit, 0);
+ range_end = range_start | REG_GENMASK(range_bit - 1, 0);
switch (val & RING_FORCE_TO_NONPRIV_ACCESS_MASK) {
case RING_FORCE_TO_NONPRIV_ACCESS_RW:
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
index 6c4b16409cc9..150a241110fb 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
@@ -149,10 +149,11 @@ pf_migration_consume(struct xe_device *xe, unsigned int vfid)
for_each_gt(gt, xe, gt_id) {
data = xe_gt_sriov_pf_migration_save_consume(gt, vfid);
- if (data && PTR_ERR(data) != EAGAIN)
+ if (!data)
+ continue;
+ if (!IS_ERR(data) || PTR_ERR(data) != -EAGAIN)
return data;
- if (PTR_ERR(data) == -EAGAIN)
- more_data = true;
+ more_data = true;
}
if (!more_data)
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
index db023fb66a27..09b99fb2608b 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
@@ -14,9 +14,9 @@
#include "xe_guc.h"
#include "xe_guc_submit.h"
#include "xe_lrc.h"
+#include "xe_mem_pool.h"
#include "xe_migrate.h"
#include "xe_pm.h"
-#include "xe_sa.h"
#include "xe_sriov_printk.h"
#include "xe_sriov_vf.h"
#include "xe_sriov_vf_ccs.h"
@@ -141,43 +141,47 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe)
static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
{
+ struct xe_mem_pool *pool;
struct xe_device *xe = tile_to_xe(tile);
- struct xe_sa_manager *sa_manager;
+ u32 *pool_cpu_addr, *last_dw_addr;
u64 bb_pool_size;
- int offset, err;
+ int err;
bb_pool_size = get_ccs_bb_pool_size(xe);
xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
- sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
- XE_SA_BO_MANAGER_FLAG_SHADOW);
-
- if (IS_ERR(sa_manager)) {
- xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
- sa_manager);
- err = PTR_ERR(sa_manager);
+ pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32),
+ XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY);
+ if (IS_ERR(pool)) {
+ xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n",
+ pool);
+ err = PTR_ERR(pool);
return err;
}
- offset = 0;
- xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
- bb_pool_size);
- xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
- bb_pool_size);
+ pool_cpu_addr = xe_mem_pool_cpu_addr(pool);
+ memset(pool_cpu_addr, 0, bb_pool_size);
- offset = bb_pool_size - sizeof(u32);
- xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
- xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
+ last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1;
+ *last_dw_addr = MI_BATCH_BUFFER_END;
- ctx->mem.ccs_bb_pool = sa_manager;
+ /**
+ * Sync the main copy and shadow copy so that the shadow copy is
+ * replica of main copy. We sync only BBs after init part. So, we
+ * need to make sure the main pool and shadow copy are in sync after
+ * this point. This is needed as GuC may read the BB commands from
+ * shadow copy.
+ */
+ xe_mem_pool_sync(pool);
+ ctx->mem.ccs_bb_pool = pool;
return 0;
}
static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
{
- u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
+ u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
u32 dw[10], i = 0;
@@ -388,7 +392,7 @@ err_ret:
#define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET (2 * sizeof(u32))
void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
{
- u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
+ u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);
@@ -412,8 +416,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
struct xe_device *xe = xe_bo_device(bo);
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
struct xe_sriov_vf_ccs_ctx *ctx;
+ struct xe_mem_pool_node *bb;
struct xe_tile *tile;
- struct xe_bb *bb;
int err = 0;
xe_assert(xe, IS_VF_CCS_READY(xe));
@@ -445,7 +449,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
{
struct xe_device *xe = xe_bo_device(bo);
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
- struct xe_bb *bb;
+ struct xe_mem_pool_node *bb;
xe_assert(xe, IS_VF_CCS_READY(xe));
@@ -471,8 +475,8 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
*/
void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
{
- struct xe_sa_manager *bb_pool;
enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+ struct xe_mem_pool *bb_pool;
if (!IS_VF_CCS_READY(xe))
return;
@@ -485,7 +489,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
drm_printf(p, "-------------------------\n");
- drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
+ xe_mem_pool_dump(bb_pool, p);
drm_puts(p, "\n");
}
}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
index 22c499943d2a..6fc8f97ef3f4 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
@@ -17,9 +17,6 @@ enum xe_sriov_vf_ccs_rw_ctxs {
XE_SRIOV_VF_CCS_CTX_COUNT
};
-struct xe_migrate;
-struct xe_sa_manager;
-
/**
* struct xe_sriov_vf_ccs_ctx - VF CCS migration context data.
*/
@@ -33,7 +30,7 @@ struct xe_sriov_vf_ccs_ctx {
/** @mem: memory data */
struct {
/** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */
- struct xe_sa_manager *ccs_bb_pool;
+ struct xe_mem_pool *ccs_bb_pool;
} mem;
};
diff --git a/drivers/gpu/drm/xe/xe_tile_types.h b/drivers/gpu/drm/xe/xe_tile_types.h
index 33932fd547d7..0048100ccb72 100644
--- a/drivers/gpu/drm/xe/xe_tile_types.h
+++ b/drivers/gpu/drm/xe/xe_tile_types.h
@@ -106,8 +106,6 @@ struct xe_tile {
struct xe_lmtt lmtt;
} pf;
struct {
- /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */
- struct xe_ggtt_node *ggtt_balloon[2];
/** @sriov.vf.self_config: VF configuration data */
struct xe_tile_sriov_vf_selfconfig self_config;
} vf;
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index f8de6a4bf189..fcb6698abc6e 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -97,7 +97,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
{ XE_RTP_NAME("Tuning: Set STLB Bank Hash Mode to 4KB"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3510, XE_RTP_END_VERSION_UNDEFINED),
IS_INTEGRATED),
- XE_RTP_ACTIONS(FIELD_SET(XEHP_GAMSTLB_CTRL, BANK_HASH_MODE,
+ XE_RTP_ACTIONS(FIELD_SET(GAMSTLB_CTRL, BANK_HASH_MODE,
BANK_HASH_4KB_MODE))
},
};
@@ -129,7 +129,7 @@ static const struct xe_rtp_entry_sr engine_tunings[] = {
static const struct xe_rtp_entry_sr lrc_tunings[] = {
{ XE_RTP_NAME("Tuning: Windower HW Filtering"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3599), ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, HW_FILTERING))
+ XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN4, HW_FILTERING))
},
/* DG2 */
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 56e2db50bb36..ab6cc1f0a789 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1120,6 +1120,25 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
xe_bo_assert_held(bo);
+ /*
+ * Reject only WILLNEED mappings on DONTNEED/PURGED BOs. This
+ * gates new vm_bind ioctls (user supplies WILLNEED) while
+ * still allowing partial-unbind / remap splits whose new VMAs
+ * inherit the parent's DONTNEED attr. It must also run before
+ * xe_bo_willneed_get_locked() below so a 0->1 holder bump
+ * cannot silently promote DONTNEED back to WILLNEED.
+ */
+ if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) {
+ if (xe_bo_madv_is_dontneed(bo)) {
+ xe_vma_free(vma);
+ return ERR_PTR(-EBUSY);
+ }
+ if (xe_bo_is_purged(bo)) {
+ xe_vma_free(vma);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base);
if (IS_ERR(vm_bo)) {
xe_vma_free(vma);
@@ -1131,6 +1150,10 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
vma->gpuva.gem.offset = bo_offset_or_userptr;
drm_gpuva_link(&vma->gpuva, vm_bo);
drm_gpuvm_bo_put(vm_bo);
+
+ xe_bo_vma_count_inc_locked(bo);
+ if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED)
+ xe_bo_willneed_get_locked(bo);
} else /* userptr or null */ {
if (!is_null && !is_cpu_addr_mirror) {
struct xe_userptr_vma *uvma = to_userptr_vma(vma);
@@ -1208,7 +1231,10 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
xe_bo_assert_held(bo);
drm_gpuva_unlink(&vma->gpuva);
- xe_bo_recompute_purgeable_state(bo);
+
+ xe_bo_vma_count_dec_locked(bo);
+ if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED)
+ xe_bo_willneed_put_locked(bo);
}
xe_vm_assert_held(vm);
@@ -3016,7 +3042,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
* @res_evict: Allow evicting resources during validation
* @validate: Perform BO validation
* @request_decompress: Request BO decompression
- * @check_purged: Reject operation if BO is purged
+ * @check_purged: Reject operation if BO is DONTNEED or PURGED
*/
struct xe_vma_lock_and_validate_flags {
u32 res_evict : 1;
@@ -3030,6 +3056,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
{
struct xe_bo *bo = xe_vma_bo(vma);
struct xe_vm *vm = xe_vma_vm(vma);
+ bool validate_bo = flags.validate;
int err = 0;
if (bo) {
@@ -3044,7 +3071,11 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
err = -EINVAL; /* BO already purged */
}
- if (!err && flags.validate)
+ /* Don't validate the BO for DONTNEED/PURGED remap remnants. */
+ if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_WILLNEED)
+ validate_bo = false;
+
+ if (!err && validate_bo)
err = xe_bo_validate(bo, vm,
xe_vm_allow_vm_eviction(vm) &&
flags.res_evict, exec);
@@ -3152,7 +3183,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
op->map.immediate,
.request_decompress =
op->map.request_decompress,
- .check_purged = true,
+ .check_purged = false,
});
break;
case DRM_GPUVA_OP_REMAP:
@@ -3174,7 +3205,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
.res_evict = res_evict,
.validate = true,
.request_decompress = false,
- .check_purged = true,
+ .check_purged = false,
});
if (!err && op->remap.next)
err = vma_lock_and_validate(exec, op->remap.next,
@@ -3182,7 +3213,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
.res_evict = res_evict,
.validate = true,
.request_decompress = false,
- .check_purged = true,
+ .check_purged = false,
});
break;
case DRM_GPUVA_OP_UNMAP:
@@ -3211,9 +3242,11 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
}
/*
- * Prefetch attempts to migrate BO's backing store without
- * repopulating it first. Purged BOs have no backing store
- * to migrate, so reject the operation.
+ * PREFETCH is the only op that still gates on BO purge state.
+ * MAP/REMAP handle this inside xe_vma_create() so partial
+ * unbind on a DONTNEED BO still works. PREFETCH skips
+ * xe_vma_create() and would migrate a BO with no backing
+ * store, so reject DONTNEED/PURGED here.
*/
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.prefetch.va),
@@ -3658,6 +3691,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+ XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE &&
+ is_cpu_addr_mirror) ||
XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) &&
(op == DRM_XE_VM_BIND_OP_MAP_USERPTR ||
is_cpu_addr_mirror) &&
@@ -4156,7 +4191,8 @@ int xe_vm_get_property_ioctl(struct drm_device *drm, void *data,
int ret = 0;
if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] ||
- args->reserved[2])))
+ args->reserved[2] || args->extensions ||
+ args->pad)))
return -EINVAL;
vm = xe_vm_lookup(xef, args->vm_id);
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
index 66f00d3f5c07..c4fb29004195 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -186,147 +186,6 @@ static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
}
/**
- * xe_bo_is_dmabuf_shared() - Check if BO is shared via dma-buf
- * @bo: Buffer object
- *
- * Prevent marking imported or exported dma-bufs as purgeable.
- * For imported BOs, Xe doesn't own the backing store and cannot
- * safely reclaim pages (exporter or other devices may still be
- * using them). For exported BOs, external devices may have active
- * mappings we cannot track.
- *
- * Return: true if BO is imported or exported, false otherwise
- */
-static bool xe_bo_is_dmabuf_shared(struct xe_bo *bo)
-{
- struct drm_gem_object *obj = &bo->ttm.base;
-
- /* Imported: exporter owns backing store */
- if (drm_gem_is_imported(obj))
- return true;
-
- /* Exported: external devices may be accessing */
- if (obj->dma_buf)
- return true;
-
- return false;
-}
-
-/**
- * enum xe_bo_vmas_purge_state - VMA purgeable state aggregation
- *
- * Distinguishes whether a BO's VMAs are all DONTNEED, have at least
- * one WILLNEED, or have no VMAs at all.
- *
- * Enum values align with XE_MADV_PURGEABLE_* states for consistency.
- */
-enum xe_bo_vmas_purge_state {
- /** @XE_BO_VMAS_STATE_WILLNEED: At least one VMA is WILLNEED */
- XE_BO_VMAS_STATE_WILLNEED = 0,
- /** @XE_BO_VMAS_STATE_DONTNEED: All VMAs are DONTNEED */
- XE_BO_VMAS_STATE_DONTNEED = 1,
- /** @XE_BO_VMAS_STATE_NO_VMAS: BO has no VMAs */
- XE_BO_VMAS_STATE_NO_VMAS = 2,
-};
-
-/*
- * xe_bo_recompute_purgeable_state() casts between xe_bo_vmas_purge_state and
- * xe_madv_purgeable_state. Enforce that WILLNEED=0 and DONTNEED=1 match across
- * both enums so the single-line cast is always valid.
- */
-static_assert(XE_BO_VMAS_STATE_WILLNEED == (int)XE_MADV_PURGEABLE_WILLNEED,
- "VMA purge state WILLNEED must equal madv purgeable WILLNEED");
-static_assert(XE_BO_VMAS_STATE_DONTNEED == (int)XE_MADV_PURGEABLE_DONTNEED,
- "VMA purge state DONTNEED must equal madv purgeable DONTNEED");
-
-/**
- * xe_bo_all_vmas_dontneed() - Determine BO VMA purgeable state
- * @bo: Buffer object
- *
- * Check all VMAs across all VMs to determine aggregate purgeable state.
- * Shared BOs require unanimous DONTNEED state from all mappings.
- *
- * Caller must hold BO dma-resv lock.
- *
- * Return: XE_BO_VMAS_STATE_DONTNEED if all VMAs are DONTNEED,
- * XE_BO_VMAS_STATE_WILLNEED if at least one VMA is not DONTNEED,
- * XE_BO_VMAS_STATE_NO_VMAS if BO has no VMAs
- */
-static enum xe_bo_vmas_purge_state xe_bo_all_vmas_dontneed(struct xe_bo *bo)
-{
- struct drm_gpuvm_bo *vm_bo;
- struct drm_gpuva *gpuva;
- struct drm_gem_object *obj = &bo->ttm.base;
- bool has_vmas = false;
-
- xe_bo_assert_held(bo);
-
- /* Shared dma-bufs cannot be purgeable */
- if (xe_bo_is_dmabuf_shared(bo))
- return XE_BO_VMAS_STATE_WILLNEED;
-
- drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
- drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
- struct xe_vma *vma = gpuva_to_vma(gpuva);
-
- has_vmas = true;
-
- /* Any non-DONTNEED VMA prevents purging */
- if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_DONTNEED)
- return XE_BO_VMAS_STATE_WILLNEED;
- }
- }
-
- /*
- * No VMAs => preserve existing BO purgeable state.
- * Avoids incorrectly flipping DONTNEED -> WILLNEED when last VMA unmapped.
- */
- if (!has_vmas)
- return XE_BO_VMAS_STATE_NO_VMAS;
-
- return XE_BO_VMAS_STATE_DONTNEED;
-}
-
-/**
- * xe_bo_recompute_purgeable_state() - Recompute BO purgeable state from VMAs
- * @bo: Buffer object
- *
- * Walk all VMAs to determine if BO should be purgeable or not.
- * Shared BOs require unanimous DONTNEED state from all mappings.
- * If the BO has no VMAs the existing state is preserved.
- *
- * Locking: Caller must hold BO dma-resv lock. When iterating GPUVM lists,
- * VM lock must also be held (write) to prevent concurrent VMA modifications.
- * This is satisfied at both call sites:
- * - xe_vma_destroy(): holds vm->lock write
- * - madvise_purgeable(): holds vm->lock write (from madvise ioctl path)
- *
- * Return: nothing
- */
-void xe_bo_recompute_purgeable_state(struct xe_bo *bo)
-{
- enum xe_bo_vmas_purge_state vma_state;
-
- if (!bo)
- return;
-
- xe_bo_assert_held(bo);
-
- /*
- * Once purged, always purged. Cannot transition back to WILLNEED.
- * This matches i915 semantics where purged BOs are permanently invalid.
- */
- if (bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED)
- return;
-
- vma_state = xe_bo_all_vmas_dontneed(bo);
-
- if (vma_state != (enum xe_bo_vmas_purge_state)bo->madv_purgeable &&
- vma_state != XE_BO_VMAS_STATE_NO_VMAS)
- xe_bo_set_purgeable_state(bo, (enum xe_madv_purgeable_state)vma_state);
-}
-
-/**
* madvise_purgeable - Handle purgeable buffer object advice
* @xe: XE device
* @vm: VM
@@ -359,12 +218,6 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm,
/* BO must be locked before modifying madv state */
xe_bo_assert_held(bo);
- /* Skip shared dma-bufs - no PTEs to zap */
- if (xe_bo_is_dmabuf_shared(bo)) {
- vmas[i]->skip_invalidation = true;
- continue;
- }
-
/*
* Once purged, always purged. Cannot transition back to WILLNEED.
* This matches i915 semantics where purged BOs are permanently invalid.
@@ -377,13 +230,14 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm,
switch (op->purge_state_val.val) {
case DRM_XE_VMA_PURGEABLE_STATE_WILLNEED:
- vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED;
vmas[i]->skip_invalidation = true;
-
- xe_bo_recompute_purgeable_state(bo);
+ /* Only act on a real DONTNEED -> WILLNEED transition. */
+ if (vmas[i]->attr.purgeable_state == XE_MADV_PURGEABLE_DONTNEED) {
+ vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_WILLNEED;
+ xe_bo_willneed_get_locked(bo);
+ }
break;
case DRM_XE_VMA_PURGEABLE_STATE_DONTNEED:
- vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED;
/*
* Don't zap PTEs at DONTNEED time -- pages are still
* alive. The zap happens in xe_bo_move_notify() right
@@ -391,7 +245,11 @@ static void madvise_purgeable(struct xe_device *xe, struct xe_vm *vm,
*/
vmas[i]->skip_invalidation = true;
- xe_bo_recompute_purgeable_state(bo);
+ /* Only act on a real WILLNEED -> DONTNEED transition. */
+ if (vmas[i]->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) {
+ vmas[i]->attr.purgeable_state = XE_MADV_PURGEABLE_DONTNEED;
+ xe_bo_willneed_put_locked(bo);
+ }
break;
default:
/* Should never hit - values validated in madvise_args_are_sane() */
@@ -621,6 +479,45 @@ static int xe_madvise_purgeable_retained_to_user(const struct xe_madvise_details
return 0;
}
+static bool check_pat_args_are_sane(struct xe_device *xe,
+ struct xe_vmas_in_madvise_range *madvise_range,
+ u16 pat_index)
+{
+ u16 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
+ int i;
+
+ /*
+ * Using coh_none with CPU cached buffers is not allowed on iGPU.
+ * On iGPU the GPU shares the LLC with the CPU, so with coh_none
+ * the GPU bypasses CPU caches and reads directly from DRAM,
+ * potentially seeing stale sensitive data from previously freed
+ * pages. On dGPU this restriction does not apply, because the
+ * platform does not provide a non-coherent system memory access
+ * path that would violate the DMA coherency contract.
+ */
+ if (coh_mode != XE_COH_NONE || IS_DGFX(xe))
+ return true;
+
+ for (i = 0; i < madvise_range->num_vmas; i++) {
+ struct xe_vma *vma = madvise_range->vmas[i];
+ struct xe_bo *bo = xe_vma_bo(vma);
+
+ if (bo) {
+ /* BO with WB caching + COH_NONE is not allowed */
+ if (XE_IOCTL_DBG(xe, bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
+ return false;
+ /* Imported dma-buf without caching info, assume cached */
+ if (XE_IOCTL_DBG(xe, !bo->cpu_caching))
+ return false;
+ } else if (XE_IOCTL_DBG(xe, xe_vma_is_cpu_addr_mirror(vma) ||
+ xe_vma_is_userptr(vma)))
+ /* System memory (userptr/SVM) is always CPU cached */
+ return false;
+ }
+
+ return true;
+}
+
static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
int num_vmas, u32 atomic_val)
{
@@ -750,6 +647,14 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
}
}
+ if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) {
+ if (!check_pat_args_are_sane(xe, &madvise_range,
+ args->pat_index.val)) {
+ err = -EINVAL;
+ goto free_vmas;
+ }
+ }
+
if (madvise_range.has_bo_vmas) {
if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
if (!check_bo_args_are_sane(vm, madvise_range.vmas,
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h
index 39acd2689ca0..a3078f634c7e 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.h
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.h
@@ -13,6 +13,4 @@ struct xe_bo;
int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
-void xe_bo_recompute_purgeable_state(struct xe_bo *bo);
-
#endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 546296f0220b..33df43d0bede 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -651,7 +651,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
},
{ XE_RTP_NAME("18033852989"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST))
+ XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST))
},
{ XE_RTP_NAME("15016589081"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)),
@@ -743,14 +743,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
},
- { XE_RTP_NAME("14019988906"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
- },
- { XE_RTP_NAME("14019877138"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
- },
{ XE_RTP_NAME("14021490052"),
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(FF_MODE,
@@ -762,7 +754,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
},
{ XE_RTP_NAME("22021007897"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
+ XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
},
/* Xe3_LPG */
@@ -778,7 +770,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
},
{ XE_RTP_NAME("22021007897"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
+ XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
},
{ XE_RTP_NAME("14024681466"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)),