diff options
| author | Thomas Zimmermann <tzimmermann@suse.de> | 2024-08-30 14:14:29 +0200 |
|---|---|---|
| committer | Thomas Zimmermann <tzimmermann@suse.de> | 2024-08-30 15:00:53 +0200 |
| commit | 3973a8d052829dcc5193fb6330dcd13c2b7127ca (patch) | |
| tree | b02f355e874ec5997196dfea09ad796419c38e7d /drivers/gpu/drm | |
| parent | e75356ef5baef69e9f577023c453d91a902dc082 (diff) | |
| parent | 6d0ebb3904853d18eeec7af5e8b4ca351b6f9025 (diff) | |
Merge drm/drm-next into drm-misc-next
Backmerging to get fixes from v6.11-rc5.
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'drivers/gpu/drm')
708 files changed, 19573 insertions, 8338 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4baeb6519fda..7fe41a3c2541 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -118,6 +118,8 @@ #define MAX_GPU_INSTANCE 64 +#define GFX_SLICE_PERIOD msecs_to_jiffies(250) + struct amdgpu_gpu_instance { struct amdgpu_device *adev; int mgpu_fan_enabled; @@ -347,9 +349,9 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; -#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ -#define MAX_KIQ_REG_WAIT (amdgpu_sriov_vf(adev) ? 50000 : 5000) /* in usecs, extend for VF */ -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ +#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 int amdgpu_device_ip_set_clockgating_state(void *dev, @@ -823,17 +825,6 @@ struct amdgpu_mqd { struct amdgpu_reset_domain; struct amdgpu_fru_info; -struct amdgpu_reset_info { - /* reset dump register */ - u32 *reset_dump_reg_list; - u32 *reset_dump_reg_value; - int num_regs; - -#ifdef CONFIG_DEV_COREDUMP - struct amdgpu_coredump_info *coredump_info; -#endif -}; - /* * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. */ @@ -1157,8 +1148,6 @@ struct amdgpu_device { struct mutex benchmark_mutex; - struct amdgpu_reset_info reset_info; - bool scpm_enabled; uint32_t scpm_status; @@ -1175,6 +1164,10 @@ struct amdgpu_device { bool debug_disable_soft_recovery; bool debug_use_vram_fw_buf; bool debug_enable_ras_aca; + + bool enforce_isolation[MAX_XCP]; + /* Added this mutex for cleaner shader isolation between GFX and compute processes */ + struct mutex enforce_isolation_mutex; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, @@ -1587,13 +1580,6 @@ static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif -#if defined(CONFIG_DRM_AMD_DC) -int amdgpu_dm_display_resume(struct amdgpu_device *adev ); -#else -static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; } -#endif - - void amdgpu_register_gpu_instance(struct amdgpu_device *adev); void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 19158cc30f31..57bda66e85ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -80,6 +80,9 @@ static void aca_banks_release(struct aca_banks *banks) { struct aca_bank_node *node, *tmp; + if (list_empty(&banks->list)) + return; + list_for_each_entry_safe(node, tmp, &banks->list, node) { list_del(&node->node); kvfree(node); @@ -453,13 +456,13 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er switch (type) { case ACA_ERROR_TYPE_UE: - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, count); break; case ACA_ERROR_TYPE_CE: - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, count); break; case ACA_ERROR_TYPE_DEFERRED: - amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count); + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, count); break; default: break; @@ -562,9 +565,13 @@ static void aca_error_fini(struct aca_error *aerr) struct aca_bank_error *bank_error, *tmp; mutex_lock(&aerr->lock); + if (list_empty(&aerr->list)) + goto out_unlock; + list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) aca_bank_error_remove(aerr, bank_error); +out_unlock: mutex_destroy(&aerr->lock); } @@ -680,6 +687,9 @@ static void aca_manager_fini(struct aca_handle_manager *mgr) { struct aca_handle *handle, *tmp; + if (list_empty(&mgr->list)) + return; + list_for_each_entry_safe(handle, tmp, &mgr->list, node) amdgpu_aca_remove_handle(handle); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 03205e3c3746..4f08b153cb66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -364,15 +364,15 @@ allocate_mem_reserve_bo_failed: return r; } -void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj) +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj) { - struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; + struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj; - amdgpu_bo_reserve(bo, true); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unpin(bo); - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&(bo)); + amdgpu_bo_reserve(*bo, true); + amdgpu_bo_kunmap(*bo); + amdgpu_bo_unpin(*bo); + amdgpu_bo_unreserve(*bo); + amdgpu_bo_unref(bo); } int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, @@ -783,22 +783,6 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst, int hub_type) -{ - if (!hub_type) { - if (adev->gfxhub.funcs->query_utcl2_poison_status) - return adev->gfxhub.funcs->query_utcl2_poison_status(adev, hub_inst); - else - return false; - } else { - if (adev->mmhub.funcs->query_utcl2_poison_status) - return adev->mmhub.funcs->query_utcl2_poison_status(adev, hub_inst); - else - return false; - } -} - int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) { return kgd2kfd_check_and_lock_kfd(); @@ -887,3 +871,21 @@ free_ring_funcs: return r; } + +/* Stop scheduling on KFD */ +int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return 0; + + return kgd2kfd_stop_sched(adev->kfd.dev, node_id); +} + +/* Start scheduling on KFD */ +int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return 0; + + return kgd2kfd_start_sched(adev->kfd.dev, node_id); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index e7bb1ca35801..f9d119448442 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -235,7 +235,7 @@ int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool mqd_gfx9); -void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj); +void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj); int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, void **mem_obj); void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj); @@ -264,6 +264,8 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, uint32_t *payload); int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, u32 inst); +int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id); +int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id); /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when @@ -322,7 +324,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, void **kptr, uint64_t *size); void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo); +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence __rcu **ef); @@ -345,11 +347,9 @@ void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad pasid_notify pasid_fn, void *data, uint32_t reset); bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); -bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); +bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst, int hub_type); int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, @@ -426,6 +426,8 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); int kgd2kfd_check_and_lock_kfd(void); void kgd2kfd_unlock_kfd(void); +int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); #else static inline int kgd2kfd_init(void) { @@ -496,5 +498,15 @@ static inline int kgd2kfd_check_and_lock_kfd(void) static inline void kgd2kfd_unlock_kfd(void) { } + +static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + return 0; +} + +static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + return 0; +} #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index aff08321e976..8dfdb18197c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -191,4 +191,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 2320df51c914..73b2b401b450 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -418,5 +418,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, - .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index a5c7259cf2a3..e2ae714a700f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -541,5 +541,7 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { kgd_gfx_v9_4_3_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode, .set_address_watch = kgd_gfx_v9_4_3_set_address_watch, - .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch + .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 3ab6c3aa0ad1..62176d607bef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -1070,6 +1070,20 @@ static void program_trap_handler_settings(struct amdgpu_device *adev, unlock_srbm(adev); } +uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + return 0; +} + +uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -1097,4 +1111,6 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, .program_trap_handler_settings = program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v10_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index 67bcaa3d4226..9efd2dd4fdd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -56,3 +56,12 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); +uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst); +uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst, + unsigned int utimeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 8c8437a4383f..c718bedda0ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -680,5 +680,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode, .set_address_watch = kgd_gfx_v10_set_address_watch, - .clear_address_watch = kgd_gfx_v10_clear_address_watch + .clear_address_watch = kgd_gfx_v10_clear_address_watch, + .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v10_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index b61a32d6af4b..a4ba49cb22db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -786,6 +786,20 @@ static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev, return 0; } +static uint64_t kgd_gfx_v11_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + return 0; +} + +static uint64_t kgd_gfx_v11_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .program_sh_mem_settings = program_sh_mem_settings_v11, .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11, @@ -808,5 +822,7 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode, .set_address_watch = kgd_gfx_v11_set_address_watch, - .clear_address_watch = kgd_gfx_v11_clear_address_watch + .clear_address_watch = kgd_gfx_v11_clear_address_watch, + .hqd_get_pq_addr = kgd_gfx_v11_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v11_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 5a35a8ca8922..c63528a4e894 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1144,6 +1144,105 @@ void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, kgd_gfx_v9_unlock_srbm(adev, inst); } +uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + uint32_t low, high; + uint64_t queue_addr = 0; + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + amdgpu_gfx_rlc_enter_safe_mode(adev, inst); + + if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE)) + goto unlock_out; + + low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE); + high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI); + + /* only concerned with user queues. */ + if (!high) + goto unlock_out; + + queue_addr = (((queue_addr | high) << 32) | low) << 8; + +unlock_out: + amdgpu_gfx_rlc_exit_safe_mode(adev, inst); + kgd_gfx_v9_release_queue(adev, inst); + + return queue_addr; +} + +/* assume queue acquired */ +static int kgd_gfx_v9_hqd_dequeue_wait(struct amdgpu_device *adev, uint32_t inst, + unsigned int utimeout) +{ + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + while (true) { + uint32_t temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE); + + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + return 0; + + if (time_after(jiffies, end_jiffies)) + return -ETIME; + + usleep_range(500, 1000); + } +} + +uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + uint32_t low, high, pipe_reset_data = 0; + uint64_t queue_addr = 0; + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + amdgpu_gfx_rlc_enter_safe_mode(adev, inst); + + if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE)) + goto unlock_out; + + low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE); + high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI); + + /* only concerned with user queues. */ + if (!high) + goto unlock_out; + + queue_addr = (((queue_addr | high) << 32) | low) << 8; + + pr_debug("Attempting queue reset on XCC %i pipe id %i queue id %i\n", + inst, pipe_id, queue_id); + + /* assume previous dequeue request issued will take affect after reset */ + WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1); + + if (!kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout)) + goto unlock_out; + + pr_debug("Attempting pipe reset on XCC %i pipe id %i\n", inst, pipe_id); + + pipe_reset_data = REG_SET_FIELD(pipe_reset_data, CP_MEC_CNTL, MEC_ME1_PIPE0_RESET, 1); + pipe_reset_data = pipe_reset_data << pipe_id; + + WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, pipe_reset_data); + WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, 0); + + if (kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout)) + queue_addr = 0; + +unlock_out: + pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n", + inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!"); + amdgpu_gfx_rlc_exit_safe_mode(adev, inst); + kgd_gfx_v9_release_queue(adev, inst); + + return queue_addr; +} + const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -1172,4 +1271,6 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index ce424615f59b..988c50ac3be0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -101,3 +101,12 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); +uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst); +uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, + uint32_t queue_id, + uint32_t inst, + unsigned int utimeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 11672bfe4fad..6d5fd371d5ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1252,7 +1252,7 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, return ret; } -static void unmap_bo_from_gpuvm(struct kgd_mem *mem, +static int unmap_bo_from_gpuvm(struct kgd_mem *mem, struct kfd_mem_attachment *entry, struct amdgpu_sync *sync) { @@ -1260,11 +1260,18 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, struct amdgpu_device *adev = entry->adev; struct amdgpu_vm *vm = bo_va->base.vm; + if (bo_va->queue_refcount) { + pr_debug("bo_va->queue_refcount %d\n", bo_va->queue_refcount); + return -EBUSY; + } + amdgpu_vm_bo_unmap(adev, bo_va, entry->va); amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); amdgpu_sync_fence(sync, bo_va->last_pt_update); + + return 0; } static int update_gpuvm_pte(struct kgd_mem *mem, @@ -2191,7 +2198,10 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", entry->va, entry->va + bo_size, entry); - unmap_bo_from_gpuvm(mem, entry, ctx.sync); + ret = unmap_bo_from_gpuvm(mem, entry, ctx.sync); + if (ret) + goto unreserve_out; + entry->is_mapped = false; mem->mapped_to_gpu_memory--; @@ -2226,11 +2236,12 @@ int amdgpu_amdkfd_gpuvm_sync_memory( /** * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count * @bo: Buffer object to be mapped + * @bo_gart: Return bo reference * * Before return, bo reference count is incremented. To release the reference and unpin/ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. */ -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo) +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart) { int ret; @@ -2257,7 +2268,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo) amdgpu_bo_unreserve(bo); - bo = amdgpu_bo_ref(bo); + *bo_gart = amdgpu_bo_ref(bo); return 0; @@ -3200,12 +3211,13 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem) +bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem) { + struct amdgpu_vm *vm = drm_priv_to_vm(drm_priv); struct kfd_mem_attachment *entry; list_for_each_entry(entry, &mem->attachments, list) { - if (entry->is_mapped && entry->adev == adev) + if (entry->is_mapped && entry->bo_va->base.vm == vm) return true; } return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 7dc102f0bc1d..0c8975ac5af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1018,8 +1018,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (clock_type == COMPUTE_ENGINE_PLL_PARAM) { args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->post_div = args.v3.ucPostDiv; dividers->enable_post_div = (args.v3.ucCntlFlag & @@ -1039,8 +1040,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (strobe_mode) args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->post_div = args.v5.ucPostDiv; dividers->enable_post_div = (args.v5.ucCntlFlag & @@ -1058,8 +1060,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, /* fusion */ args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->post_divider = dividers->post_div = args.v4.ucPostDiv; dividers->real_clock = le32_to_cpu(args.v4.ulClock); @@ -1070,8 +1073,9 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, args.v6_in.ulClock.ulComputeClockFlag = clock_type; args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv); dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac); @@ -1113,8 +1117,9 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev, if (strobe_mode) args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac); mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv); @@ -1211,8 +1216,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v2.ucVoltageMode = 0; args.v2.usVoltageLevel = 0; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; *voltage = le16_to_cpu(args.v2.usVoltageLevel); break; @@ -1221,8 +1227,9 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL; args.v3.usVoltageLevel = cpu_to_le16(voltage_id); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, - sizeof(args)); + if (amdgpu_atom_execute_table(adev->mode_info.atom_context, + index, (uint32_t *)&args, sizeof(args))) + return -EINVAL; *voltage = le16_to_cpu(args.v3.usVoltageLevel); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 618e469e3622..42e64bce661e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -431,6 +431,11 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) goto success; } + if (amdgpu_read_platform_bios(adev)) { + dev_info(adev->dev, "Fetched VBIOS from platform\n"); + goto success; + } + if (amdgpu_read_bios(adev)) { dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n"); goto success; @@ -446,11 +451,6 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) goto success; } - if (amdgpu_read_platform_bios(adev)) { - dev_info(adev->dev, "Fetched VBIOS from platform\n"); - goto success; - } - dev_err(adev->dev, "Unable to locate a BIOS ROM\n"); return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index c3d89088123d..16153d275d7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -414,7 +414,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->pm.fw, "%s", fw_name); if (err) { DRM_ERROR("Failed to load firmware \"%s\"", fw_name); amdgpu_ucode_release(&adev->pm.fw); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index cae7479c3ecf..344e0a9ee08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -249,11 +249,7 @@ amdgpu_connector_find_encoder(struct drm_connector *connector, static struct edid * amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev) { - if (adev->mode_info.bios_hardcoded_edid) { - return kmemdup((unsigned char *)adev->mode_info.bios_hardcoded_edid, - adev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL); - } - return NULL; + return drm_edid_duplicate(drm_edid_raw(adev->mode_info.bios_hardcoded_edid)); } static void amdgpu_connector_get_edid(struct drm_connector *connector) @@ -442,6 +438,9 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, continue; mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); + if (!mode) + return; + drm_mode_probed_add(connector, mode); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9aa952f258cf..1e475eb01417 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -263,6 +263,10 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, if (size < sizeof(struct drm_amdgpu_bo_list_in)) goto free_partial_kdata; + /* Only a single BO list is allowed to simplify handling. */ + if (p->bo_list) + ret = -EINVAL; + ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata); if (ret) goto free_partial_kdata; @@ -292,6 +296,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, num_ibs[i], &p->jobs[i]); if (ret) goto free_all_kdata; + p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id]; } p->gang_leader = p->jobs[p->gang_leader_idx]; @@ -1057,6 +1062,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, r = amdgpu_ring_parse_cs(ring, p, job, ib); if (r) return r; + + if (ib->sa_bo) + ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); } else { ib->ptr = (uint32_t *)kptr; r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); @@ -1103,7 +1111,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) struct drm_gpu_scheduler *sched = entity->rq->sched; struct amdgpu_ring *ring = to_amdgpu_ring(sched); - if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) + if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub)) return -EINVAL; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 5cb33ac99f70..c43d1b6e5d66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_free(fpriv, id); break; case AMDGPU_CTX_OP_QUERY_STATE: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_QUERY_STATE2: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_GET_STABLE_PSTATE: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 0e1a11b6b989..cbef720de779 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2026,100 +2026,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL, DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL, amdgpu_debugfs_sclk_set, "%llu\n"); -static ssize_t amdgpu_reset_dump_register_list_read(struct file *f, - char __user *buf, size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; - char reg_offset[12]; - int i, ret, len = 0; - - if (*pos) - return 0; - - memset(reg_offset, 0, 12); - ret = down_read_killable(&adev->reset_domain->sem); - if (ret) - return ret; - - for (i = 0; i < adev->reset_info.num_regs; i++) { - sprintf(reg_offset, "0x%x\n", adev->reset_info.reset_dump_reg_list[i]); - up_read(&adev->reset_domain->sem); - if (copy_to_user(buf + len, reg_offset, strlen(reg_offset))) - return -EFAULT; - - len += strlen(reg_offset); - ret = down_read_killable(&adev->reset_domain->sem); - if (ret) - return ret; - } - - up_read(&adev->reset_domain->sem); - *pos += len; - - return len; -} - -static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, - const char __user *buf, size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; - char reg_offset[11]; - uint32_t *new = NULL, *tmp = NULL; - unsigned int len = 0; - int ret, i = 0; - - do { - memset(reg_offset, 0, 11); - if (copy_from_user(reg_offset, buf + len, - min(10, (size-len)))) { - ret = -EFAULT; - goto error_free; - } - - new = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - goto error_free; - } - tmp = new; - if (sscanf(reg_offset, "%X %n", &tmp[i], &ret) != 1) { - ret = -EINVAL; - goto error_free; - } - - len += ret; - i++; - } while (len < size); - - new = kmalloc_array(i, sizeof(uint32_t), GFP_KERNEL); - if (!new) { - ret = -ENOMEM; - goto error_free; - } - ret = down_write_killable(&adev->reset_domain->sem); - if (ret) - goto error_free; - - swap(adev->reset_info.reset_dump_reg_list, tmp); - swap(adev->reset_info.reset_dump_reg_value, new); - adev->reset_info.num_regs = i; - up_write(&adev->reset_domain->sem); - ret = size; - -error_free: - if (tmp != new) - kfree(tmp); - kfree(new); - return ret; -} - -static const struct file_operations amdgpu_reset_dump_register_list = { - .owner = THIS_MODULE, - .read = amdgpu_reset_dump_register_list_read, - .write = amdgpu_reset_dump_register_list_write, - .llseek = default_llseek -}; - int amdgpu_debugfs_init(struct amdgpu_device *adev) { struct dentry *root = adev_to_drm(adev)->primary->debugfs_root; @@ -2204,8 +2110,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) &amdgpu_debugfs_vm_info_fops); debugfs_create_file("amdgpu_benchmark", 0200, root, adev, &amdgpu_benchmark_fops); - debugfs_create_file("amdgpu_reset_dump_register_list", 0644, root, adev, - &amdgpu_reset_dump_register_list); adev->debugfs_vbios_blob.data = adev->bios; adev->debugfs_vbios_blob.size = adev->bios_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index f0a44d0dec27..cf2b4dd4d865 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -203,7 +203,7 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, struct amdgpu_coredump_info *coredump = data; struct drm_print_iterator iter; struct amdgpu_vm_fault_info *fault_info; - int i, ver; + int ver; iter.data = buffer; iter.offset = 0; @@ -236,7 +236,7 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, drm_printf(&p, "\nSOC Memory Information\n"); drm_printf(&p, "real vram size: %llu\n", coredump->adev->gmc.real_vram_size); drm_printf(&p, "visible vram size: %llu\n", coredump->adev->gmc.visible_vram_size); - drm_printf(&p, "visible vram size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size); + drm_printf(&p, "gtt size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size); /* GDS Config */ drm_printf(&p, "\nGDS Config\n"); @@ -317,14 +317,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, if (coredump->reset_vram_lost) drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->reset_info.num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < coredump->adev->reset_info.num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_info.reset_dump_reg_list[i], - coredump->adev->reset_info.reset_dump_reg_value[i]); - } return count - iter.remain; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1cd7d355689c..49ef22dcf7fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1916,6 +1916,8 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) */ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { + int i; + if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); @@ -1970,6 +1972,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + for (i = 0; i < MAX_XCP; i++) + adev->enforce_isolation[i] = !!enforce_isolation; + return 0; } @@ -2471,6 +2476,7 @@ out: */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { + struct amdgpu_ip_block *ip_block; struct pci_dev *parent; int i, r; bool total; @@ -2608,7 +2614,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (!total) return -ENODEV; - amdgpu_amdkfd_device_probe(adev); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + if (ip_block->status.valid != false) + amdgpu_amdkfd_device_probe(adev); + adev->cg_flags &= amdgpu_cg_mask; adev->pg_flags &= amdgpu_pg_mask; @@ -3948,6 +3957,27 @@ static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) adev->ram_is_direct_mapped = true; } +#if defined(CONFIG_HSA_AMD_P2P) +/** + * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled. + * + * @adev: amdgpu_device pointer + * + * return if IOMMU remapping bar address + */ +static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev) +{ + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev(adev->dev); + if (domain && (domain->type == IOMMU_DOMAIN_DMA || + domain->type == IOMMU_DOMAIN_DMA_FQ)) + return true; + + return false; +} +#endif + static const struct attribute *amdgpu_dev_attributes[] = { &dev_attr_pcie_replay_count.attr, NULL @@ -4055,6 +4085,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->notifier_lock); mutex_init(&adev->pm.stable_pstate_ctx_lock); mutex_init(&adev->benchmark_mutex); + mutex_init(&adev->gfx.reset_sem_mutex); + /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ + mutex_init(&adev->enforce_isolation_mutex); + mutex_init(&adev->gfx.kfd_sch_mutex); amdgpu_device_init_apu_flags(adev); @@ -4086,6 +4120,21 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, amdgpu_device_delay_enable_gfx_off); + /* + * Initialize the enforce_isolation work structures for each XCP + * partition. This work handler is responsible for enforcing shader + * isolation on AMD GPUs. It counts the number of emitted fences for + * each GFX and compute ring. If there are any fences, it schedules + * the `enforce_isolation_work` to be run after a delay. If there are + * no fences, it signals the Kernel Fusion Driver (KFD) to resume the + * runqueue. + */ + for (i = 0; i < MAX_XCP; i++) { + INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work, + amdgpu_gfx_enforce_isolation_handler); + adev->gfx.enforce_isolation[i].adev = adev; + adev->gfx.enforce_isolation[i].xcp_id = i; + } INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); @@ -5278,16 +5327,15 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, { int i, r = 0; struct amdgpu_job *job = NULL; + struct amdgpu_device *tmp_adev = reset_context->reset_req_dev; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); if (reset_context->reset_req_dev == adev) job = reset_context->job; - if (amdgpu_sriov_vf(adev)) { - /* stop the data exchange thread */ - amdgpu_virt_fini_data_exchange(adev); - } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_pre_reset(adev); amdgpu_fence_driver_isr_toggle(adev, true); @@ -5336,6 +5384,16 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, } } + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { + dev_info(tmp_adev->dev, "Dumping IP State\n"); + /* Trigger ip dump before we reset the asic */ + for (i = 0; i < tmp_adev->num_ip_blocks; i++) + if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) + tmp_adev->ip_blocks[i].version->funcs + ->dump_ip_state((void *)tmp_adev); + dev_info(tmp_adev->dev, "Dumping IP State Completed\n"); + } + if (need_full_reset) r = amdgpu_device_ip_suspend(adev); if (need_full_reset) @@ -5348,47 +5406,17 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, return r; } -static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) -{ - int i; - - lockdep_assert_held(&adev->reset_domain->sem); - - for (i = 0; i < adev->reset_info.num_regs; i++) { - adev->reset_info.reset_dump_reg_value[i] = - RREG32(adev->reset_info.reset_dump_reg_list[i]); - - trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], - adev->reset_info.reset_dump_reg_value[i]); - } - - return 0; -} - int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) { struct amdgpu_device *tmp_adev = NULL; bool need_full_reset, skip_hw_reset, vram_lost = false; int r = 0; - uint32_t i; /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); - if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { - amdgpu_reset_reg_dumps(tmp_adev); - - dev_info(tmp_adev->dev, "Dumping IP State\n"); - /* Trigger ip dump before we reset the asic */ - for (i = 0; i < tmp_adev->num_ip_blocks; i++) - if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) - tmp_adev->ip_blocks[i].version->funcs - ->dump_ip_state((void *)tmp_adev); - dev_info(tmp_adev->dev, "Dumping IP State Completed\n"); - } - reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); /* If reset handler not implemented, continue; otherwise return */ @@ -5513,7 +5541,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, * bad_page_threshold value to fix this once * probing driver again. */ - if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) { + if (!amdgpu_ras_is_rma(tmp_adev)) { /* must succeed. */ amdgpu_ras_resume(tmp_adev); } else { @@ -5891,8 +5919,14 @@ skip_hw_reset: tmp_adev->asic_reset_res = 0; if (r) { - /* bad news, how to tell it to userspace ? */ - dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); + /* bad news, how to tell it to userspace ? + * for ras error, we should report GPU bad status instead of + * reset failure + */ + if (reset_context->src != AMDGPU_RESET_SRC_RAS || + !amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) + dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", + atomic_read(&tmp_adev->gpu_reset_counter)); amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); @@ -6138,18 +6172,24 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev) { #ifdef CONFIG_HSA_AMD_P2P - uint64_t address_mask = peer_adev->dev->dma_mask ? - ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); - resource_size_t aper_limit = - adev->gmc.aper_base + adev->gmc.aper_size - 1; bool p2p_access = !adev->gmc.xgmi.connected_to_cpu && !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); - return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && - adev->gmc.real_vram_size == adev->gmc.visible_vram_size && - !(adev->gmc.aper_base & address_mask || - aper_limit & address_mask)); + bool is_large_bar = adev->gmc.visible_vram_size && + adev->gmc.real_vram_size == adev->gmc.visible_vram_size; + bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev); + + if (!p2p_addressable) { + uint64_t address_mask = peer_adev->dev->dma_mask ? + ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); + resource_size_t aper_limit = + adev->gmc.aper_base + adev->gmc.aper_size - 1; + + p2p_addressable = !(adev->gmc.aper_base & address_mask || + aper_limit & address_mask); + } + return is_large_bar && p2p_access && p2p_addressable; #else return false; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ac108fca64fe..4bd61c169ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -278,7 +278,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, msg = RREG32(mmMP0_SMN_C2PMSG_33); if (msg & 0x80000000) break; - usleep_range(1000, 1100); + msleep(1); } } @@ -1500,6 +1500,7 @@ union gc_info { struct gc_info_v1_0 v1; struct gc_info_v1_1 v1_1; struct gc_info_v1_2 v1_2; + struct gc_info_v1_3 v1_3; struct gc_info_v2_0 v2; struct gc_info_v2_1 v2_1; }; @@ -1558,6 +1559,16 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance); adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu); } + if (le16_to_cpu(gc_info->v1.header.version_minor) >= 3) { + adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v1_3.gc_tcp_size_per_cu); + adev->gfx.config.gc_tcp_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcp_cache_line_size); + adev->gfx.config.gc_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_size_per_sqc); + adev->gfx.config.gc_instruction_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_line_size); + adev->gfx.config.gc_scalar_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_size_per_sqc); + adev->gfx.config.gc_scalar_data_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_line_size); + adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v1_3.gc_tcc_size); + adev->gfx.config.gc_tcc_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcc_cache_line_size); + } break; case 2: adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 8283d682f543..7cc980bf4725 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -55,8 +55,6 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); -int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); -void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev); void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 82452606ae6c..b4efeef848de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -24,10 +24,13 @@ */ #include <linux/firmware.h> +#include <linux/pm_runtime.h> + #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "amdgpu_xcp.h" #include "amdgpu_xgmi.h" @@ -509,6 +512,16 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.compute_ring[j], + RESET_QUEUES, 0, 0); + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -551,6 +564,18 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + j = i + xcc_id * adev->gfx.num_gfx_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.gfx_ring[j], + PREEMPT_QUEUES, 0, 0); + } + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -860,8 +885,11 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r int r; if (amdgpu_ras_is_supported(adev, ras_block->block)) { - if (!amdgpu_persistent_edc_harvesting_supported(adev)) - amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + if (!amdgpu_persistent_edc_harvesting_supported(adev)) { + r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + if (r) + return r; + } r = amdgpu_ras_block_late_init(adev, ras_block); if (r) @@ -995,7 +1023,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ if (amdgpu_device_skip_hw_access(adev)) return 0; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) return amdgpu_mes_rreg(adev, reg); BUG_ON(!ring->funcs->emit_rreg); @@ -1005,7 +1033,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ pr_err("critical bug! too many kiq readers\n"); goto failed_unlock; } - amdgpu_ring_alloc(ring, 32); + r = amdgpu_ring_alloc(ring, 32); + if (r) + goto failed_unlock; + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) @@ -1065,13 +1096,16 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 if (amdgpu_device_skip_hw_access(adev)) return; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_wreg(adev, reg, v); return; } spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); + r = amdgpu_ring_alloc(ring, 32); + if (r) + goto failed_unlock; + amdgpu_ring_emit_wreg(ring, reg, v); r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) @@ -1107,6 +1141,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 failed_undo: amdgpu_ring_undo(ring); +failed_unlock: spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_write: dev_err(adev->dev, "failed to write reg:%x\n", reg); @@ -1359,6 +1394,214 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, return sysfs_emit(buf, "%s\n", supported_partition); } +static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + long timeout = msecs_to_jiffies(1000); + struct dma_fence *f = NULL; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + int i, r; + + r = amdgpu_job_alloc_with_ib(adev, NULL, NULL, + 64, AMDGPU_IB_POOL_DIRECT, + &job); + if (r) + goto err; + + job->enforce_isolation = true; + + ib = &job->ibs[0]; + for (i = 0; i <= ring->funcs->align_mask; ++i) + ib->ptr[i] = ring->funcs->nop; + ib->length_dw = ring->funcs->align_mask + 1; + + r = amdgpu_job_submit_direct(job, ring, &f); + if (r) + goto err_free; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) + r = -ETIMEDOUT; + else if (r > 0) + r = 0; + + amdgpu_ib_free(adev, ib, f); + dma_fence_put(f); + + return 0; + +err_free: + amdgpu_job_free(job); + amdgpu_ib_free(adev, ib, f); +err: + return r; +} + +static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id) +{ + int num_xcc = NUM_XCC(adev->gfx.xcc_mask); + struct amdgpu_ring *ring; + int num_xcc_to_clear; + int i, r, xcc_id; + + if (adev->gfx.num_xcc_per_xcp) + num_xcc_to_clear = adev->gfx.num_xcc_per_xcp; + else + num_xcc_to_clear = 1; + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; + if ((ring->xcp_id == xcp_id) && ring->sched.ready) { + r = amdgpu_gfx_run_cleaner_shader_job(ring); + if (r) + return r; + num_xcc_to_clear--; + break; + } + } + } + + if (num_xcc_to_clear) + return -ENOENT; + + return 0; +} + +static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int ret; + long value; + + if (amdgpu_in_reset(adev)) + return -EPERM; + if (adev->in_suspend && !adev->in_runpm) + return -EPERM; + + ret = kstrtol(buf, 0, &value); + + if (ret) + return -EINVAL; + + if (value < 0) + return -EINVAL; + + if (adev->xcp_mgr) { + if (value >= adev->xcp_mgr->num_xcps) + return -EINVAL; + } else { + if (value > 1) + return -EINVAL; + } + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + ret = amdgpu_gfx_run_cleaner_shader(adev, value); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + if (ret) + return ret; + + return count; +} + +static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int i; + ssize_t size = 0; + + if (adev->xcp_mgr) { + for (i = 0; i < adev->xcp_mgr->num_xcps; i++) { + size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]); + if (i < (adev->xcp_mgr->num_xcps - 1)) + size += sysfs_emit_at(buf, size, " "); + } + buf[size++] = '\n'; + } else { + size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]); + } + + return size; +} + +static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + long partition_values[MAX_XCP] = {0}; + int ret, i, num_partitions; + const char *input_buf = buf; + + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + ret = sscanf(input_buf, "%ld", &partition_values[i]); + if (ret <= 0) + break; + + /* Move the pointer to the next value in the string */ + input_buf = strchr(input_buf, ' '); + if (input_buf) { + input_buf++; + } else { + i++; + break; + } + } + num_partitions = i; + + if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps) + return -EINVAL; + + if (!adev->xcp_mgr && num_partitions != 1) + return -EINVAL; + + for (i = 0; i < num_partitions; i++) { + if (partition_values[i] != 0 && partition_values[i] != 1) + return -EINVAL; + } + + mutex_lock(&adev->enforce_isolation_mutex); + + for (i = 0; i < num_partitions; i++) { + if (adev->enforce_isolation[i] && !partition_values[i]) { + /* Going from enabled to disabled */ + amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); + } else if (!adev->enforce_isolation[i] && partition_values[i]) { + /* Going from disabled to enabled */ + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + } + adev->enforce_isolation[i] = partition_values[i]; + } + + mutex_unlock(&adev->enforce_isolation_mutex); + + return count; +} + +static DEVICE_ATTR(run_cleaner_shader, 0200, + NULL, amdgpu_gfx_set_run_cleaner_shader); + +static DEVICE_ATTR(enforce_isolation, 0644, + amdgpu_gfx_get_enforce_isolation, + amdgpu_gfx_set_enforce_isolation); + static DEVICE_ATTR(current_compute_partition, 0644, amdgpu_gfx_get_current_compute_partition, amdgpu_gfx_set_compute_partition); @@ -1384,3 +1627,229 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_current_compute_partition); device_remove_file(adev->dev, &dev_attr_available_compute_partition); } + +int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) +{ + int r; + + if (!amdgpu_sriov_vf(adev)) { + r = device_create_file(adev->dev, &dev_attr_enforce_isolation); + if (r) + return r; + } + + r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); + if (r) + return r; + + return 0; +} + +void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev)) + device_remove_file(adev->dev, &dev_attr_enforce_isolation); + device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); +} + +int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size) +{ + if (!adev->gfx.enable_cleaner_shader) + return -EOPNOTSUPP; + + return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.cleaner_shader_obj, + &adev->gfx.cleaner_shader_gpu_addr, + (void **)&adev->gfx.cleaner_shader_cpu_ptr); +} + +void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev) +{ + if (!adev->gfx.enable_cleaner_shader) + return; + + amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj, + &adev->gfx.cleaner_shader_gpu_addr, + (void **)&adev->gfx.cleaner_shader_cpu_ptr); +} + +void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size, + const void *cleaner_shader_ptr) +{ + if (!adev->gfx.enable_cleaner_shader) + return; + + if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr) + memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr, + cleaner_shader_size); +} + +/** + * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver) + * @adev: amdgpu_device pointer + * @idx: Index of the scheduler to control + * @enable: Whether to enable or disable the KFD scheduler + * + * This function is used to control the KFD (Kernel Fusion Driver) scheduler + * from the KGD. It is part of the cleaner shader feature. This function plays + * a key role in enforcing process isolation on the GPU. + * + * The function uses a reference count mechanism (kfd_sch_req_count) to keep + * track of the number of requests to enable the KFD scheduler. When a request + * to enable the KFD scheduler is made, the reference count is decremented. + * When the reference count reaches zero, a delayed work is scheduled to + * enforce isolation after a delay of GFX_SLICE_PERIOD. + * + * When a request to disable the KFD scheduler is made, the function first + * checks if the reference count is zero. If it is, it cancels the delayed work + * for enforcing isolation and checks if the KFD scheduler is active. If the + * KFD scheduler is active, it sends a request to stop the KFD scheduler and + * sets the KFD scheduler state to inactive. Then, it increments the reference + * count. + * + * The function is synchronized using the kfd_sch_mutex to ensure that the KFD + * scheduler state and reference count are updated atomically. + * + * Note: If the reference count is already zero when a request to enable the + * KFD scheduler is made, it means there's an imbalance bug somewhere. The + * function triggers a warning in this case. + */ +static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, + bool enable) +{ + mutex_lock(&adev->gfx.kfd_sch_mutex); + + if (enable) { + /* If the count is already 0, it means there's an imbalance bug somewhere. + * Note that the bug may be in a different caller than the one which triggers the + * WARN_ON_ONCE. + */ + if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) { + dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n"); + goto unlock; + } + + adev->gfx.kfd_sch_req_count[idx]--; + + if (adev->gfx.kfd_sch_req_count[idx] == 0 && + adev->gfx.kfd_sch_inactive[idx]) { + schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, + GFX_SLICE_PERIOD); + } + } else { + if (adev->gfx.kfd_sch_req_count[idx] == 0) { + cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work); + if (!adev->gfx.kfd_sch_inactive[idx]) { + amdgpu_amdkfd_stop_sched(adev, idx); + adev->gfx.kfd_sch_inactive[idx] = true; + } + } + + adev->gfx.kfd_sch_req_count[idx]++; + } + +unlock: + mutex_unlock(&adev->gfx.kfd_sch_mutex); +} + +/** + * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation + * + * @work: work_struct. + * + * This function is the work handler for enforcing shader isolation on AMD GPUs. + * It counts the number of emitted fences for each GFX and compute ring. If there + * are any fences, it schedules the `enforce_isolation_work` to be run after a + * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion + * Driver (KFD) to resume the runqueue. The function is synchronized using the + * `enforce_isolation_mutex`. + */ +void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) +{ + struct amdgpu_isolation_work *isolation_work = + container_of(work, struct amdgpu_isolation_work, work.work); + struct amdgpu_device *adev = isolation_work->adev; + u32 i, idx, fences = 0; + + if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = isolation_work->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) { + if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id) + fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]); + } + for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) { + if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id) + fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); + } + if (fences) { + schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, + GFX_SLICE_PERIOD); + } else { + /* Tell KFD to resume the runqueue */ + if (adev->kfd.init_complete) { + WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); + WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); + amdgpu_amdkfd_start_sched(adev, idx); + adev->gfx.kfd_sch_inactive[idx] = false; + } + } + mutex_unlock(&adev->enforce_isolation_mutex); +} + +void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 idx; + + if (!adev->gfx.enable_cleaner_shader) + return; + + if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = ring->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + if (adev->kfd.init_complete) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); + } + mutex_unlock(&adev->enforce_isolation_mutex); +} + +void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 idx; + + if (!adev->gfx.enable_cleaner_shader) + return; + + if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION) + idx = 0; + else + idx = ring->xcp_id; + + if (idx >= MAX_XCP) + return; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + if (adev->kfd.init_complete) + amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); + } + mutex_unlock(&adev->enforce_isolation_mutex); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ddda94e49db4..5644e10a86a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -34,6 +34,7 @@ #include "soc15.h" #include "amdgpu_ras.h" #include "amdgpu_ring_mux.h" +#include "amdgpu_xcp.h" /* GFX current status */ #define AMDGPU_GFX_NORMAL_MODE 0x00000000L @@ -138,6 +139,10 @@ struct kiq_pm4_funcs { void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring, uint16_t pasid, uint32_t flush_type, bool all_hub); + void (*kiq_reset_hw_queue)(struct amdgpu_ring *kiq_ring, + uint32_t queue_type, uint32_t me_id, + uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid); /* Packet sizes */ int set_resources_size; int map_queues_size; @@ -240,6 +245,12 @@ struct amdgpu_gfx_config { uint32_t gc_tcp_size_per_cu; uint32_t gc_num_cu_per_sqc; uint32_t gc_tcc_size; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_cache_line_size; }; struct amdgpu_cu_info { @@ -339,6 +350,12 @@ struct amdgpu_me { DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES); }; +struct amdgpu_isolation_work { + struct amdgpu_device *adev; + u32 xcp_id; + struct delayed_work work; +}; + struct amdgpu_gfx { struct mutex gpu_clock_mutex; struct amdgpu_gfx_config config; @@ -391,6 +408,7 @@ struct amdgpu_gfx { struct amdgpu_irq_src eop_irq; struct amdgpu_irq_src priv_reg_irq; struct amdgpu_irq_src priv_inst_irq; + struct amdgpu_irq_src bad_op_irq; struct amdgpu_irq_src cp_ecc_error_irq; struct amdgpu_irq_src sq_irq; struct amdgpu_irq_src rlc_gc_fed_irq; @@ -439,6 +457,21 @@ struct amdgpu_gfx { uint32_t *ip_dump_core; uint32_t *ip_dump_compute_queues; uint32_t *ip_dump_gfx_queues; + + struct mutex reset_sem_mutex; + + /* cleaner shader */ + struct amdgpu_bo *cleaner_shader_obj; + unsigned int cleaner_shader_size; + u64 cleaner_shader_gpu_addr; + void *cleaner_shader_cpu_ptr; + const void *cleaner_shader_ptr; + bool enable_cleaner_shader; + struct amdgpu_isolation_work enforce_isolation[MAX_XCP]; + /* Mutex for synchronizing KFD scheduler operations */ + struct mutex kfd_sch_mutex; + u64 kfd_sch_req_count[MAX_XCP]; + bool kfd_sch_inactive[MAX_XCP]; }; struct amdgpu_gfx_ras_reg_entry { @@ -540,6 +573,17 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, void *ras_error_status, void (*func)(struct amdgpu_device *adev, void *ras_error_status, int xcc_id)); +int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size); +void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev); +void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, + unsigned int cleaner_shader_size, + const void *cleaner_shader_ptr); +int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev); +void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); +void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); +void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); +void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index 103a837ccc71..c7b44aeb671b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -38,8 +38,6 @@ struct amdgpu_gfxhub_funcs { void (*mode2_save_regs)(struct amdgpu_device *adev); void (*mode2_restore_regs)(struct amdgpu_device *adev); void (*halt)(struct amdgpu_device *adev); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, - int xcc_id); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c02659025656..17a19d49d30a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -589,7 +589,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ring = adev->rings[i]; vmhub = ring->vm_hub; - if (ring == &adev->mes.ring || + if (ring == &adev->mes.ring[0] || + ring == &adev->mes.ring[1] || ring == &adev->umsch_mm.ring) continue; @@ -761,7 +762,7 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, unsigned long flags; uint32_t seq; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, ref, mask); return; @@ -785,7 +786,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, goto failed_kiq; might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY && + !amdgpu_reset_pending(adev->reset_domain)) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index febca3130497..4d951a1baefa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + /* get the DCC buffer alignment */ + unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev); enum amdgpu_memory_partition (*query_mem_partition_mode)( struct amdgpu_device *adev); @@ -363,6 +365,10 @@ struct amdgpu_gmc { (adev)->gmc.gmc_funcs->override_vm_pte_flags \ ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) +#define amdgpu_gmc_get_dcc_alignment(adev) ({ \ + typeof(adev) _adev = (adev); \ + _adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \ +}) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index b6a8bddada4c..92d27d32de41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -424,7 +424,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (amdgpu_vmid_uses_reserved(vm, vmhub)) { + if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -476,15 +476,19 @@ error: /* * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID + * @adev: amdgpu_device pointer * @vm: the VM to check * @vmhub: the VMHUB which will be used * * Returns: True if the VM will use a reserved VMID. */ -bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) +bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, unsigned int vmhub) { return vm->reserved_vmid[vmhub] || - (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0))); + (adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ? + vm->root.bo->xcp_id : 0] && + AMDGPU_IS_GFXHUB(vmhub)); } int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, @@ -600,9 +604,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) } } /* alloc a default reserved vmid to enforce isolation */ - if (enforce_isolation) - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); - + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + if (adev->enforce_isolation[i]) + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + } } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 240fa6751260..4012fb2dd08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -78,7 +78,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); -bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); +bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, unsigned int vmhub); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h index 44e2ea8c9728..b03664c66dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h @@ -49,6 +49,7 @@ struct amdgpu_isp { const struct isp_funcs *funcs; struct mfd_cell *isp_cell; struct resource *isp_res; + struct resource *isp_i2c_res; struct isp_platform_data *isp_pdata; unsigned int harvest_config; const struct firmware *fw; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index e238f2832f65..597489dea114 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -72,6 +72,26 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) dma_fence_set_error(&s_job->s_fence->finished, -ETIME); + /* attempt a per ring reset */ + if (amdgpu_gpu_recovery && + ring->funcs->reset) { + /* stop the scheduler, but don't mess with the + * bad job yet because if ring reset fails + * we'll fall back to full GPU reset. + */ + drm_sched_wqueue_stop(&ring->sched); + r = amdgpu_ring_reset(ring, job->vmid); + if (!r) { + if (amdgpu_ring_sched_ready(ring)) + drm_sched_stop(&ring->sched, s_job); + atomic_inc(&ring->adev->gpu_reset_counter); + amdgpu_fence_driver_force_completion(ring); + if (amdgpu_ring_sched_ready(ring)) + drm_sched_start(&ring->sched); + goto exit; + } + } + if (amdgpu_device_should_recover_gpu(ring->adev)) { struct amdgpu_reset_context reset_context; memset(&reset_context, 0, sizeof(reset_context)); @@ -264,9 +284,8 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, struct dma_fence *fence = NULL; int r; - /* Ignore soft recovered fences here */ r = drm_sched_entity_error(s_entity); - if (r && r != -ENODATA) + if (r) goto error; if (!fence && job->gang_submit) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index a963a25ddd62..ce6b9ba967ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -76,6 +76,9 @@ struct amdgpu_job { /* job_run_counter >= 1 means a resubmit job */ uint32_t job_run_counter; + /* enforce isolation */ + bool enforce_isolation; + uint32_t num_ibs; struct amdgpu_ib ibs[]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 0a799942343d..016a6f6c4267 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -43,6 +43,7 @@ #include "amdgpu_gem.h" #include "amdgpu_display.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "amd_pcie.h" void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) @@ -778,6 +779,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ? -EFAULT : 0; } case AMDGPU_INFO_READ_MMR_REG: { + int ret = 0; unsigned int n, alloc_size; uint32_t *regs; unsigned int se_num = (info->read_mmr_reg.instance >> @@ -787,24 +789,37 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) AMDGPU_INFO_MMR_SH_INDEX_SHIFT) & AMDGPU_INFO_MMR_SH_INDEX_MASK; + if (!down_read_trylock(&adev->reset_domain->sem)) + return -ENOENT; + /* set full masks if the userspace set all bits * in the bitfields */ - if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) + if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) { se_num = 0xffffffff; - else if (se_num >= AMDGPU_GFX_MAX_SE) - return -EINVAL; - if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) + } else if (se_num >= AMDGPU_GFX_MAX_SE) { + ret = -EINVAL; + goto out; + } + + if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) { sh_num = 0xffffffff; - else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) - return -EINVAL; + } else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) { + ret = -EINVAL; + goto out; + } - if (info->read_mmr_reg.count > 128) - return -EINVAL; + if (info->read_mmr_reg.count > 128) { + ret = -EINVAL; + goto out; + } regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL); - if (!regs) - return -ENOMEM; + if (!regs) { + ret = -ENOMEM; + goto out; + } + alloc_size = info->read_mmr_reg.count * sizeof(*regs); amdgpu_gfx_off_ctrl(adev, false); @@ -816,13 +831,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) info->read_mmr_reg.dword_offset + i); kfree(regs); amdgpu_gfx_off_ctrl(adev, true); - return -EFAULT; + ret = -EFAULT; + goto out; } } amdgpu_gfx_off_ctrl(adev, true); n = copy_to_user(out, regs, min(size, alloc_size)); kfree(regs); - return n ? -EFAULT : 0; + ret = (n ? -EFAULT : 0); +out: + up_read(&adev->reset_domain->sem); + return ret; } case AMDGPU_INFO_DEV_INFO: { struct drm_amdgpu_info_device *dev_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 2542bd7aa7c7..18ee60378727 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -396,7 +396,6 @@ static int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set, struct ras_err_data *err_data) { - struct ras_err_addr err_addr; struct amdgpu_smuio_mcm_config_info mcm_info; struct mca_bank_node *node, *tmp; struct mca_bank_entry *entry; @@ -421,27 +420,20 @@ static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_r continue; memset(&mcm_info, 0, sizeof(mcm_info)); - memset(&err_addr, 0, sizeof(err_addr)); mcm_info.socket_id = entry->info.socket_id; mcm_info.die_id = entry->info.aid; - if (blk == AMDGPU_RAS_BLOCK__UMC) { - err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS]; - err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID]; - err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR]; - } - if (type == AMDGPU_MCA_ERROR_TYPE_UE) { amdgpu_ras_error_statistic_ue_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + &mcm_info, (uint64_t)count); } else { if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS])) amdgpu_ras_error_statistic_de_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + &mcm_info, (uint64_t)count); else amdgpu_ras_error_statistic_ce_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + &mcm_info, (uint64_t)count); } amdgpu_mca_bank_set_remove_node(mca_set, node); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index dac88d2dd70d..44c74a08987d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -135,9 +135,11 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); - spin_lock_init(&adev->mes.ring_lock); mutex_init(&adev->mes.mutex_hidden); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) + spin_lock_init(&adev->mes.ring_lock[i]); + adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; @@ -163,36 +165,38 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.sdma_hqd_mask[i] = 0xfc; } - r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); - if (r) { - dev_err(adev->dev, - "(%d) ring trail_fence_offs wb alloc failed\n", r); - goto error_ids; - } - adev->mes.sch_ctx_gpu_addr = - adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); - adev->mes.sch_ctx_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) ring trail_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.sch_ctx_gpu_addr[i] = + adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); + adev->mes.sch_ctx_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; - r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); - if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - dev_err(adev->dev, - "(%d) query_status_fence_offs wb alloc failed\n", r); - goto error_ids; + r = amdgpu_device_wb_get(adev, + &adev->mes.query_status_fence_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) query_status_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + + (adev->mes.query_status_fence_offs[i] * 4); + adev->mes.query_status_fence_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; } - adev->mes.query_status_fence_gpu_addr = - adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); - adev->mes.query_status_fence_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); dev_err(adev->dev, "(%d) read_val_offs alloc failed\n", r); - goto error_ids; + goto error; } adev->mes.read_val_gpu_addr = adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); @@ -212,10 +216,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev) error_doorbell: amdgpu_mes_doorbell_free(adev); error: - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); -error_ids: + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); idr_destroy(&adev->mes.queue_id_idr); @@ -226,13 +236,22 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { + int i; + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + amdgpu_mes_doorbell_free(adev); idr_destroy(&adev->mes.pasid_idr); @@ -482,60 +501,50 @@ int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id) int amdgpu_mes_suspend(struct amdgpu_device *adev) { - struct idr *idp; - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; struct mes_suspend_gang_input input; - int r, pasid; + int r; + + if (!amdgpu_mes_suspend_resume_all_supported(adev)) + return 0; + + memset(&input, 0x0, sizeof(struct mes_suspend_gang_input)); + input.suspend_all_gangs = 1; /* * Avoid taking any other locks under MES lock to avoid circular * lock dependencies. */ amdgpu_mes_lock(&adev->mes); - - idp = &adev->mes.pasid_idr; - - idr_for_each_entry(idp, process, pasid) { - list_for_each_entry(gang, &process->gang_list, list) { - r = adev->mes.funcs->suspend_gang(&adev->mes, &input); - if (r) - DRM_ERROR("failed to suspend pasid %d gangid %d", - pasid, gang->gang_id); - } - } - + r = adev->mes.funcs->suspend_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); - return 0; + if (r) + DRM_ERROR("failed to suspend all gangs"); + + return r; } int amdgpu_mes_resume(struct amdgpu_device *adev) { - struct idr *idp; - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; struct mes_resume_gang_input input; - int r, pasid; + int r; + + if (!amdgpu_mes_suspend_resume_all_supported(adev)) + return 0; + + memset(&input, 0x0, sizeof(struct mes_resume_gang_input)); + input.resume_all_gangs = 1; /* * Avoid taking any other locks under MES lock to avoid circular * lock dependencies. */ amdgpu_mes_lock(&adev->mes); - - idp = &adev->mes.pasid_idr; - - idr_for_each_entry(idp, process, pasid) { - list_for_each_entry(gang, &process->gang_list, list) { - r = adev->mes.funcs->resume_gang(&adev->mes, &input); - if (r) - DRM_ERROR("failed to resume pasid %d gangid %d", - pasid, gang->gang_id); - } - } - + r = adev->mes.funcs->resume_gang(&adev->mes, &input); amdgpu_mes_unlock(&adev->mes); - return 0; + if (r) + DRM_ERROR("failed to resume all gangs"); + + return r; } static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, @@ -774,6 +783,49 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) return 0; } +int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id) +{ + unsigned long flags; + struct amdgpu_mes_queue *queue; + struct amdgpu_mes_gang *gang; + struct mes_reset_queue_input queue_input; + int r; + + /* + * Avoid taking any other locks under MES lock to avoid circular + * lock dependencies. + */ + amdgpu_mes_lock(&adev->mes); + + /* remove the mes gang from idr list */ + spin_lock_irqsave(&adev->mes.queue_id_lock, flags); + + queue = idr_find(&adev->mes.queue_id_idr, queue_id); + if (!queue) { + spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); + amdgpu_mes_unlock(&adev->mes); + DRM_ERROR("queue id %d doesn't exist\n", queue_id); + return -EINVAL; + } + spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); + + DRM_DEBUG("try to reset queue, doorbell off = 0x%llx\n", + queue->doorbell_off); + + gang = queue->gang; + queue_input.doorbell_offset = queue->doorbell_off; + queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; + + r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to reset hardware queue, queue id = %d\n", + queue_id); + + amdgpu_mes_unlock(&adev->mes); + + return 0; +} + int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -819,6 +871,30 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, return r; } +int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct mes_reset_legacy_queue_input queue_input; + int r; + + memset(&queue_input, 0, sizeof(queue_input)); + + queue_input.queue_type = ring->funcs->type; + queue_input.doorbell_offset = ring->doorbell_index; + queue_input.pipe_id = ring->pipe; + queue_input.queue_id = ring->queue; + queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + queue_input.wptr_addr = ring->wptr_gpu_addr; + queue_input.vmid = vmid; + + r = adev->mes.funcs->reset_legacy_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to reset legacy queue\n"); + + return r; +} + uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) { struct mes_misc_op_input op_input; @@ -1499,7 +1575,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) { + if (adev->enable_uni_mes) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_uni_mes.bin", ucode_prefix); } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && @@ -1514,7 +1590,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); } - r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name); + r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], "%s", fw_name); if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], @@ -1565,6 +1641,19 @@ out: return r; } +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) +{ + uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; + bool is_supported = false; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && + mes_rev >= 0x63) + is_supported = true; + + return is_supported; +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 2d659c612f03..a5b1ea60cac8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -82,8 +82,8 @@ struct amdgpu_mes { uint64_t default_process_quantum; uint64_t default_gang_quantum; - struct amdgpu_ring ring; - spinlock_t ring_lock; + struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES]; + spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES]; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; @@ -112,12 +112,12 @@ struct amdgpu_mes { uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; - uint32_t sch_ctx_offs; - uint64_t sch_ctx_gpu_addr; - uint64_t *sch_ctx_ptr; - uint32_t query_status_fence_offs; - uint64_t query_status_fence_gpu_addr; - uint64_t *query_status_fence_ptr; + uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES]; + uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES]; uint32_t read_val_offs; uint64_t read_val_gpu_addr; uint32_t *read_val_ptr; @@ -248,6 +248,11 @@ struct mes_remove_queue_input { uint64_t gang_context_addr; }; +struct mes_reset_queue_input { + uint32_t doorbell_offset; + uint64_t gang_context_addr; +}; + struct mes_map_legacy_queue_input { uint32_t queue_type; uint32_t doorbell_offset; @@ -279,6 +284,16 @@ struct mes_resume_gang_input { uint64_t gang_context_addr; }; +struct mes_reset_legacy_queue_input { + uint32_t queue_type; + uint32_t doorbell_offset; + uint32_t pipe_id; + uint32_t queue_id; + uint64_t mqd_addr; + uint64_t wptr_addr; + uint32_t vmid; +}; + enum mes_misc_opcode { MES_MISC_OP_WRITE_REG, MES_MISC_OP_READ_REG, @@ -347,6 +362,12 @@ struct amdgpu_mes_funcs { int (*misc_op)(struct amdgpu_mes *mes, struct mes_misc_op_input *input); + + int (*reset_legacy_queue)(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input); + + int (*reset_hw_queue)(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input); }; #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) @@ -374,6 +395,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, struct amdgpu_mes_queue_properties *qprops, int *queue_id); int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); +int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id); int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); @@ -381,6 +403,9 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, enum amdgpu_unmap_queues_action action, u64 gpu_addr, u64 seq); +int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + unsigned int vmid); uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg); int amdgpu_mes_wreg(struct amdgpu_device *adev, @@ -478,4 +503,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) memalloc_noreclaim_restore(mes->saved_flags); mutex_unlock(&mes->mutex_hidden); } + +bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); #endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 95d676ee207f..1ca9d4ed8063 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -63,8 +63,6 @@ struct amdgpu_mmhub_funcs { uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, bool enable); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, - int hub_inst); }; struct amdgpu_mmhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d002b845d8ac..5e3faefc5510 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -51,6 +51,7 @@ struct amdgpu_encoder; struct amdgpu_router; struct amdgpu_hpd; struct edid; +struct drm_edid; #define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base) #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base) @@ -326,8 +327,7 @@ struct amdgpu_mode_info { /* FMT dithering */ struct drm_property *dither_property; /* hardcoded DFP edid from BIOS */ - struct edid *bios_hardcoded_edid; - int bios_hardcoded_edid_size; + const struct drm_edid *bios_hardcoded_edid; /* firmware flags */ u32 firmware_flags; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index bc42ccbde659..d7e27957013f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -90,6 +90,12 @@ struct amdgpu_bo_va { bool cleared; bool is_xgmi; + + /* + * protected by vm reservation lock + * if non-zero, cannot unmap from GPU because user queues may still access it + */ + unsigned int queue_refcount; }; struct amdgpu_bo { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 0c856005df6b..38face981c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -166,6 +166,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t if (ret) return -EFAULT; + if (ta_bin_len > PSP_1_MEG) + return -EINVAL; + copy_pos += sizeof(uint32_t); ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d0307c55da50..61a2f386d9fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1223,11 +1223,11 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; amdgpu_ras_error_statistic_de_count(&obj->err_data, - &err_info->mcm_info, NULL, err_info->de_count); + &err_info->mcm_info, err_info->de_count); amdgpu_ras_error_statistic_ce_count(&obj->err_data, - &err_info->mcm_info, NULL, err_info->ce_count); + &err_info->mcm_info, err_info->ce_count); amdgpu_ras_error_statistic_ue_count(&obj->err_data, - &err_info->mcm_info, NULL, err_info->ue_count); + &err_info->mcm_info, err_info->ue_count); } } else { /* for legacy asic path which doesn't has error source info */ @@ -2153,7 +2153,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * /* gpu reset is fallback for failed and default cases. * For RMA case, amdgpu_umc_poison_handler will handle gpu reset. */ - if (poison_stat && !con->is_rma) { + if (poison_stat && !amdgpu_ras_is_rma(adev)) { event_id = amdgpu_ras_acquire_event_id(adev, type); RAS_EVENT_LOG(adev, event_id, "GPU reset for %s RAS poison consumption is issued!\n", @@ -2881,9 +2881,6 @@ static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log) { mutex_init(&ecc_log->lock); - /* Set any value as siphash key */ - memset(&ecc_log->ecc_key, 0xad, sizeof(ecc_log->ecc_key)); - INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL); ecc_log->de_queried_count = 0; ecc_log->prev_de_queried_count = 0; @@ -2948,7 +2945,7 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work) amdgpu_ras_error_data_fini(&err_data); - if (err_cnt && con->is_rma) + if (err_cnt && amdgpu_ras_is_rma(adev)) amdgpu_ras_reset_gpu(adev); amdgpu_ras_schedule_retirement_dwork(con, @@ -3049,7 +3046,7 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev, } /* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */ - if (reset_flags && !con->is_rma) { + if (reset_flags && !amdgpu_ras_is_rma(adev)) { if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET; else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) @@ -3195,7 +3192,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) * This calling fails when is_rma is true or * ret != 0. */ - if (con->is_rma || ret) + if (amdgpu_ras_is_rma(adev) || ret) goto free; if (con->eeprom_control.ras_num_recs) { @@ -3244,7 +3241,7 @@ out: * Except error threshold exceeding case, other failure cases in this * function would not fail amdgpu driver init. */ - if (!con->is_rma) + if (!amdgpu_ras_is_rma(adev)) ret = 0; else ret = -EINVAL; @@ -4287,7 +4284,7 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); /* mode1 is the only selection for RMA status */ - if (ras->is_rma) { + if (amdgpu_ras_is_rma(adev)) { ras->gpu_reset_flags = 0; ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; } @@ -4611,8 +4608,6 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d if (!err_node) return NULL; - INIT_LIST_HEAD(&err_node->err_info.err_addr_list); - memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); err_data->err_list_count++; @@ -4622,21 +4617,9 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d return &err_node->err_info; } -void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr) -{ - /* This function will be retired. */ - return; -} - -void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *mca_err_addr) -{ - list_del(&mca_err_addr->node); - kfree(mca_err_addr); -} - int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count) { struct ras_err_info *err_info; @@ -4650,9 +4633,6 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, if (!err_info) return -EINVAL; - if (err_addr && err_addr->err_status) - amdgpu_ras_add_mca_err_addr(err_info, err_addr); - err_info->ue_count += count; err_data->ue_count += count; @@ -4660,8 +4640,8 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, } int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count) { struct ras_err_info *err_info; @@ -4682,8 +4662,8 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, } int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count) { struct ras_err_info *err_info; @@ -4697,9 +4677,6 @@ int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, if (!err_info) return -EINVAL; - if (err_addr && err_addr->err_status) - amdgpu_ras_add_mca_err_addr(err_info, err_addr); - err_info->de_count += count; err_data->de_count += count; @@ -4771,6 +4748,16 @@ static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev, dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm bist test failed\n", socket_id, aid_id, hbm_id, fw_status); + + if (AMDGPU_RAS_GPU_ERR_DATA_ABORT(boot_error)) + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, data abort exception\n", + socket_id, aid_id, fw_status); + + if (AMDGPU_RAS_GPU_ERR_UNKNOWN(boot_error)) + dev_info(adev->dev, + "socket: %d, aid: %d, fw_status: 0x%x, unknown boot time errors\n", + socket_id, aid_id, fw_status); } static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev, @@ -4837,3 +4824,13 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id, va_end(args); } + +bool amdgpu_ras_is_rma(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return false; + + return con->is_rma; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index dcf1f3dbb5c4..669720a9c60a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -28,7 +28,6 @@ #include <linux/list.h> #include <linux/kfifo.h> #include <linux/radix-tree.h> -#include <linux/siphash.h> #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" #include "amdgpu_smuio.h" @@ -47,6 +46,8 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8) #define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11) #define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13) +#define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x) AMDGPU_GET_REG_FIELD(x, 29, 29) +#define AMDGPU_RAS_GPU_ERR_UNKNOWN(x) AMDGPU_GET_REG_FIELD(x, 30, 30) #define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100 #define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA @@ -476,16 +477,15 @@ struct ras_err_pages { }; struct ras_ecc_err { - u64 hash_index; uint64_t status; uint64_t ipid; uint64_t addr; + uint64_t pa_pfn; struct ras_err_pages err_pages; }; struct ras_ecc_log_info { struct mutex lock; - siphash_key_t ecc_key; struct radix_tree_root de_page_tree; uint64_t de_queried_count; uint64_t prev_de_queried_count; @@ -572,19 +572,11 @@ struct ras_fs_data { char debugfs_name[32]; }; -struct ras_err_addr { - struct list_head node; - uint64_t err_status; - uint64_t err_ipid; - uint64_t err_addr; -}; - struct ras_err_info { struct amdgpu_smuio_mcm_config_info mcm_info; u64 ce_count; u64 ue_count; u64 de_count; - struct list_head err_addr_list; }; struct ras_err_node { @@ -941,14 +933,14 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, int amdgpu_ras_error_data_init(struct ras_err_data *err_data); void amdgpu_ras_error_data_fini(struct ras_err_data *err_data); int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count); int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count); int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 count); void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances); int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, const struct aca_info *aca_info, void *data); @@ -957,12 +949,6 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, struct aca_handle *handle, char *buf, void *data); -void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, - struct ras_err_addr *err_addr); - -void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, - struct ras_err_addr *mca_err_addr); - void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); @@ -982,4 +968,5 @@ __printf(3, 4) void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id, const char *fmt, ...); +bool amdgpu_ras_is_rma(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 4ae581f3fcb5..1cb920abc2fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -136,6 +136,12 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma return queue_work(domain->wq, work); } +static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) +{ + lockdep_assert_held(&domain->sem); + return rwsem_is_contended(&domain->sem); +} + void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index ad49cecb20b8..690976665cf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -144,8 +144,10 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) /* We pad to match fetch size */ count = ring->funcs->align_mask + 1 - (ring->wptr & ring->funcs->align_mask); - count %= ring->funcs->align_mask + 1; - ring->funcs->insert_nop(ring, count); + count &= ring->funcs->align_mask; + + if (count != 0) + ring->funcs->insert_nop(ring, count); mb(); amdgpu_ring_set_wptr(ring); @@ -212,6 +214,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) sched_hw_submission = max(sched_hw_submission, 256); + if (ring->funcs->type == AMDGPU_RING_TYPE_MES) + sched_hw_submission = 8; else if (ring == &adev->sdma.instance[0].page) sched_hw_submission = 256; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 582053f1cd56..f93f51002201 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -235,6 +235,8 @@ struct amdgpu_ring_funcs { void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset); void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); + int (*reset)(struct amdgpu_ring *ring, unsigned int vmid); + void (*emit_cleaner_shader)(struct amdgpu_ring *ring); }; struct amdgpu_ring { @@ -334,6 +336,7 @@ struct amdgpu_ring { #define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) #define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o))) +#define amdgpu_ring_reset(r, v) (r)->funcs->reset((r), (v)) unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index d234b7ccfaaf..1c66da1c3fb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -410,7 +410,7 @@ void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring) struct amdgpu_ring_mux *mux = &adev->gfx.muxer; WARN_ON(!ring->is_sw_ring); - if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) + if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) return; amdgpu_ring_mux_end_ib(mux, ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index d3706a484870..087ce0f6fa07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -115,6 +115,7 @@ struct amdgpu_sdma { bool has_page_queue; struct ras_common_if *ras_if; struct amdgpu_sdma_ras *ras; + uint32_t *ip_dump; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 2f84bdb8c594..bb7b9b2eaac1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -196,7 +196,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, amdgpu_umc_handle_bad_pages(adev, ras_error_status); if ((err_data->ue_count || err_data->de_count) && - (reset || (con && con->is_rma))) { + (reset || amdgpu_ras_is_rma(adev))) { con->gpu_reset_flags |= reset; amdgpu_ras_reset_gpu(adev); } @@ -204,55 +204,6 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, return AMDGPU_RAS_SUCCESS; } -int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, - uint32_t reset, uint32_t timeout_ms) -{ - struct ras_err_data err_data; - struct ras_common_if head = { - .block = AMDGPU_RAS_BLOCK__UMC, - }; - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - uint32_t timeout = timeout_ms; - - memset(&err_data, 0, sizeof(err_data)); - amdgpu_ras_error_data_init(&err_data); - - do { - - amdgpu_umc_handle_bad_pages(adev, &err_data); - - if (timeout && !err_data.de_count) { - msleep(1); - timeout--; - } - - } while (timeout && !err_data.de_count); - - if (!timeout) - dev_warn(adev->dev, "Can't find bad pages\n"); - - if (err_data.de_count) - dev_info(adev->dev, "%ld new deferred hardware errors detected\n", err_data.de_count); - - if (obj) { - obj->err_data.ue_count += err_data.ue_count; - obj->err_data.ce_count += err_data.ce_count; - obj->err_data.de_count += err_data.de_count; - } - - amdgpu_ras_error_data_fini(&err_data); - - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - - if (reset || (err_data.err_addr_cnt && con && con->is_rma)) { - con->gpu_reset_flags |= reset; - amdgpu_ras_reset_gpu(adev); - } - - return 0; -} - int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint16_t pasid, pasid_notify pasid_fn, void *data, uint32_t reset) @@ -472,43 +423,6 @@ int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, return 0; } -static int amdgpu_umc_uint64_cmp(const void *a, const void *b) -{ - uint64_t *addr_a = (uint64_t *)a; - uint64_t *addr_b = (uint64_t *)b; - - if (*addr_a > *addr_b) - return 1; - else if (*addr_a < *addr_b) - return -1; - else - return 0; -} - -/* Use string hash to avoid logging the same bad pages repeatedly */ -int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, - uint64_t *pfns, int len, uint64_t *val) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - char buf[MAX_UMC_HASH_STRING_SIZE] = {0}; - int offset = 0, i = 0; - uint64_t hash_val; - - if (!pfns || !len) - return -EINVAL; - - sort(pfns, len, sizeof(uint64_t), amdgpu_umc_uint64_cmp, NULL); - - for (i = 0; i < len; i++) - offset += snprintf(&buf[offset], sizeof(buf) - offset, "%llx", pfns[i]); - - hash_val = siphash(buf, offset, &con->umc_ecc_log.ecc_key); - - *val = hash_val; - - return 0; -} - int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err) { @@ -519,18 +433,10 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, ecc_log = &con->umc_ecc_log; mutex_lock(&ecc_log->lock); - ret = radix_tree_insert(ecc_tree, ecc_err->hash_index, ecc_err); - if (!ret) { - struct ras_err_pages *err_pages = &ecc_err->err_pages; - int i; - - /* Reserve memory */ - for (i = 0; i < err_pages->count; i++) - amdgpu_ras_reserve_page(adev, err_pages->pfn[i]); - + ret = radix_tree_insert(ecc_tree, ecc_err->pa_pfn, ecc_err); + if (!ret) radix_tree_tag_set(ecc_tree, - ecc_err->hash_index, UMC_ECC_NEW_DETECTED_TAG); - } + ecc_err->pa_pfn, UMC_ECC_NEW_DETECTED_TAG); mutex_unlock(&ecc_log->lock); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 5f50c69c3cec..ce4179db2a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -127,13 +127,8 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, int amdgpu_umc_loop_channels(struct amdgpu_device *adev, umc_func func, void *data); -int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, - uint32_t reset, uint32_t timeout_ms); - int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, uint64_t status, uint64_t ipid, uint64_t addr); -int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, - uint64_t *pfns, int len, uint64_t *val); int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index fbc2852278e1..6162582d0aa2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -587,7 +587,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) break; } - r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, fw_name); + r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, "%s", fw_name); if (r) { release_firmware(adev->umsch_mm.fw); adev->umsch_mm.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 07d930339b07..31fd30dcd593 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -260,7 +260,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->uvd.fw, fw_name); + r = amdgpu_ucode_request(adev, &adev->uvd.fw, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n", fw_name); @@ -1088,7 +1088,6 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, int r; job->vm = NULL; - ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); if (ib->length_dw % 16) { DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 968ca2c84ef7..74fdbf71d95b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -158,7 +158,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->vce.fw, fw_name); + r = amdgpu_ucode_request(adev, &adev->vce.fw, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", fw_name); @@ -749,7 +749,6 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, int i, r = 0; job->vm = NULL; - ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); for (idx = 0; idx < ib->length_dw;) { uint32_t len = amdgpu_ib_get_value(ib, idx); @@ -1044,7 +1043,6 @@ out: if (!r) { /* No error, free all destroyed handle slots */ tmp = destroyed; - amdgpu_ib_free(p->adev, ib, NULL); } else { /* Error during parsing, free all allocated handle slots */ tmp = allocated; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 1a5439abd1a0..2a1f3dbb14d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -330,6 +330,9 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; bool using_unified_queue; + + /* IP reg dump */ + uint32_t *ip_dump; }; struct amdgpu_fw_shared_rb_ptrs_struct { @@ -461,8 +464,11 @@ struct amdgpu_vcn5_fw_shared { struct amdgpu_fw_shared_unified_queue_struct sq; uint8_t pad1[8]; struct amdgpu_fw_shared_fw_logging fw_log; + uint8_t pad2[20]; struct amdgpu_fw_shared_rb_setup rb_setup; - uint8_t pad2[4]; + struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; + struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; }; #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 111c380f929b..b6397d3229e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -33,6 +33,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" #include "amdgpu_reset.h" +#include "amdgpu_dpm.h" #include "vi.h" #include "soc15.h" #include "nv.h" @@ -849,6 +850,13 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad return mode; } +void amdgpu_virt_pre_reset(struct amdgpu_device *adev) +{ + /* stop the data exchange thread */ + amdgpu_virt_fini_data_exchange(adev); + amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR); +} + void amdgpu_virt_post_reset(struct amdgpu_device *adev) { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) { @@ -858,7 +866,7 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev) adev->gfx.is_poweron = false; } - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b42a8854dca0..b650a2032c42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -376,6 +376,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id); bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id); +void amdgpu_virt_pre_reset(struct amdgpu_device *adev); void amdgpu_virt_post_reset(struct amdgpu_device *adev); bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev); bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 6415d0d039e1..e5f508d34ed8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -549,7 +549,7 @@ static int amdgpu_vkms_sw_fini(void *handle) adev->mode_info.mode_config_initialized = false; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); kfree(adev->amdgpu_vkms_output); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 52e6a0b3f0c8..e20d19ae01b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -681,6 +681,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; + if (adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader && + job->enforce_isolation) + ring->funcs->emit_cleaner_shader(ring); + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; @@ -742,6 +747,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_emit_switch_buffer(ring); } + amdgpu_ring_ib_end(ring); return 0; } @@ -2397,6 +2403,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) { + struct amdgpu_ip_block *ip_block; struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; int r, i; @@ -2428,6 +2435,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); + /* use CPU for page table update if SDMA is unavailable */ + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA); + if (!ip_block || ip_block->status.valid == false) + vm->use_cpu_for_update = true; + DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && @@ -2768,7 +2780,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault) { bool is_compute_context = false; @@ -2794,7 +2806,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, write_fault)) { + node_id, addr, ts, write_fault)) { amdgpu_bo_unref(&root); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 046949c4b695..d12d66dca8e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -558,7 +558,7 @@ amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm); void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index f91cc149d06c..7d26a962f811 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -456,6 +456,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; + unsigned int adjust_dcc_size = 0; struct drm_buddy *mm = &mgr->mm; struct drm_buddy_block *block; unsigned long pages_per_block; @@ -511,7 +512,19 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC && + adev->gmc.gmc_funcs->get_dcc_alignment) + adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev); + remaining_size = (u64)vres->base.size; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + unsigned int dcc_size; + + dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size); + remaining_size = (u64)dcc_size; + + vres->flags |= DRM_BUDDY_TRIM_DISABLE; + } mutex_lock(&mgr->lock); while (remaining_size) { @@ -521,8 +534,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, min_block_size = mgr->default_page_size; size = remaining_size; - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) + min_block_size = size; + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; BUG_ON(min_block_size < mm->chunk_size); @@ -553,6 +569,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, } mutex_unlock(&mgr->lock); + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + struct drm_buddy_block *dcc_block; + unsigned long dcc_start; + u64 trim_start; + + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); + /* Adjust the start address for DCC buffers only */ + dcc_start = + roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block), + adjust_dcc_size); + trim_start = (u64)dcc_start; + drm_buddy_block_trim(mm, &trim_start, + (u64)vres->base.size, + &vres->blocks); + } + vres->base.start = 0; size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), vres->base.size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 821ba2309dec..7de449fae1e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1389,10 +1389,10 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) { case ACA_ERROR_TYPE_UE: - amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL); + amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL); break; case ACA_ERROR_TYPE_CE: - amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL); + amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index fb2b394bb9c5..6e9eeaeb3de1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -213,7 +213,7 @@ struct amd_sriov_msg_pf2vf_info { uint32_t gpu_capacity; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; -}; +} __packed; struct amd_sriov_msg_vf2pf_info_header { /* the total structure size in byte */ @@ -273,7 +273,7 @@ struct amd_sriov_msg_vf2pf_info { uint32_t mes_info_size; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; -}; +} __packed; /* mailbox message send from guest to host */ enum amd_sriov_mailbox_request_message { diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 228fd4dd32f1..26e2188101e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -75,6 +75,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, uint32_t inst_mask; ring->xcp_id = AMDGPU_XCP_NO_PARTITION; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id; if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) return; @@ -103,6 +105,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { ring->xcp_id = xcp_id; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id; break; } } diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 650ec95bb40a..a51f3414b65d 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -2064,27 +2064,25 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder) case LCD_FAKE_EDID_PATCH_RECORD_TYPE: fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { - struct edid *edid; - int edid_size = - max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength); - edid = kmalloc(edid_size, GFP_KERNEL); - if (edid) { - memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0], - fake_edid_record->ucFakeEDIDLength); - - if (drm_edid_is_valid(edid)) { - adev->mode_info.bios_hardcoded_edid = edid; - adev->mode_info.bios_hardcoded_edid_size = edid_size; - } else - kfree(edid); - } + const struct drm_edid *edid; + int edid_size; + + if (fake_edid_record->ucFakeEDIDLength == 128) + edid_size = fake_edid_record->ucFakeEDIDLength; + else + edid_size = fake_edid_record->ucFakeEDIDLength * 128; + edid = drm_edid_alloc(fake_edid_record->ucFakeEDIDString, edid_size); + if (drm_edid_valid(edid)) + adev->mode_info.bios_hardcoded_edid = edid; + else + drm_edid_free(edid); + record += struct_size(fake_edid_record, + ucFakeEDIDString, + edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; } - record += fake_edid_record->ucFakeEDIDLength ? - struct_size(fake_edid_record, - ucFakeEDIDString, - fake_edid_record->ucFakeEDIDLength) : - /* empty fake edid record must be 3 bytes long */ - sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 55982c0064b5..06088d52d81c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -364,6 +364,7 @@ * 1 - Stream * 2 - Bypass */ +#define EOP_EXEC (1 << 28) /* For Trailing Fence */ #define DATA_SEL(x) ((x) << 29) /* 0 - discard * 1 - send low 32bit data diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index dddb5fe16f2c..742adbc460c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2846,7 +2846,7 @@ static int dce_v10_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 11780e4d7e9f..8d46ebadfa46 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2973,7 +2973,7 @@ static int dce_v11_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 05c0df97f01d..f08dc6a3886f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2745,7 +2745,7 @@ static int dce_v6_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index dc73e301d937..a6a3adf2ae13 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2766,7 +2766,7 @@ static int dce_v8_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - kfree(adev->mode_info.bios_hardcoded_edid); + drm_edid_free(adev->mode_info.bios_hardcoded_edid); drm_kms_helper_poll_fini(adev_to_drm(adev)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2957702fca0c..ca983a014ba0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4116,6 +4116,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { + char fw_name[53]; char ucode_prefix[30]; const char *wks = ""; int err; @@ -4149,8 +4150,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); if (!amdgpu_sriov_vf(adev)) { - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, - "amdgpu/%s_rlc.bin", ucode_prefix); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; @@ -4648,7 +4649,7 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -4661,7 +4662,7 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -4674,7 +4675,7 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.me.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); adev->gfx.ip_dump_gfx_queues = NULL; } else { @@ -4740,6 +4741,13 @@ static int gfx_v10_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_10_1__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); @@ -5212,26 +5220,74 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) } +static u32 gfx_v10_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0); + case 1: + return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1); + default: + return 0; + } +} + +static u32 gfx_v10_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp; + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); - - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v10_0_init_csb(struct amdgpu_device *adev) @@ -7368,6 +7424,7 @@ static int gfx_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); /* WA added for Vangogh asic fixing the SMU suspend failure * It needs to set power gating again during gfxoff control @@ -7678,6 +7735,10 @@ static int gfx_v10_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + return 0; } @@ -9073,12 +9134,39 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -9087,17 +9175,75 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v10_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -9121,8 +9267,8 @@ static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev, case 0: for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) drm_sched_fault(&ring->sched); } break; @@ -9149,6 +9295,15 @@ static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v10_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v10_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -9243,6 +9398,24 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static void gfx_v10_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -9434,7 +9607,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, .test_ring = gfx_v10_0_ring_test_ring, .test_ib = gfx_v10_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v10_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v10_0_ring_emit_sb, .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, @@ -9475,11 +9648,12 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, .test_ring = gfx_v10_0_ring_test_ring, .test_ib = gfx_v10_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v10_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, }; @@ -9535,6 +9709,11 @@ static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = { .process = gfx_v10_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v10_0_bad_op_irq_funcs = { + .set = gfx_v10_0_set_bad_op_fault_state, + .process = gfx_v10_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = { .set = gfx_v10_0_set_priv_inst_fault_state, .process = gfx_v10_0_priv_inst_irq, @@ -9556,6 +9735,9 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v10_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index dcef39907449..ee8604722467 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -481,6 +481,24 @@ static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, amdgpu_ring_write(ring, inv); /* poll interval */ } +static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -1484,7 +1502,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -1497,7 +1515,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -1510,7 +1528,7 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.me.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); adev->gfx.ip_dump_gfx_queues = NULL; } else { @@ -1569,6 +1587,13 @@ static int gfx_v11_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, + GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, @@ -1953,26 +1978,74 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev) gfx_v11_0_init_gds_vmid(adev); } +static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); + default: + return 0; + } +} + +static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, bool enable) { - u32 tmp; + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); - - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v11_0_init_csb(struct amdgpu_device *adev) @@ -4598,6 +4671,7 @@ static int gfx_v11_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -4668,8 +4742,8 @@ static int gfx_v11_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, - int req) +int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req) { u32 i, tmp, val; @@ -4707,6 +4781,8 @@ static int gfx_v11_0_soft_reset(void *handle) int r, i, j, k; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gfx_v11_0_set_safe_mode(adev, 0); + tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); @@ -4714,8 +4790,6 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_set_safe_mode(adev, 0); - mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { @@ -4740,8 +4814,10 @@ static int gfx_v11_0_soft_reset(void *handle) mutex_unlock(&adev->srbm_mutex); /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ - r = gfx_v11_0_request_gfx_index_mutex(adev, 1); + mutex_lock(&adev->gfx.reset_sem_mutex); + r = gfx_v11_0_request_gfx_index_mutex(adev, true); if (r) { + mutex_unlock(&adev->gfx.reset_sem_mutex); DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); return r; } @@ -4755,7 +4831,8 @@ static int gfx_v11_0_soft_reset(void *handle) RREG32_SOC15(GC, 0, regCP_VMID_RESET); /* release the gfx mutex */ - r = gfx_v11_0_request_gfx_index_mutex(adev, 0); + r = gfx_v11_0_request_gfx_index_mutex(adev, false); + mutex_unlock(&adev->gfx.reset_sem_mutex); if (r) { DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); return r; @@ -4954,6 +5031,9 @@ static int gfx_v11_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; return 0; } @@ -6201,15 +6281,42 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -6218,17 +6325,75 @@ static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -6252,8 +6417,8 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, case 0: for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) drm_sched_fault(&ring->sched); } break; @@ -6281,6 +6446,15 @@ static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v11_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -6556,7 +6730,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, .test_ring = gfx_v11_0_ring_test_ring, .test_ib = gfx_v11_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v11_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, @@ -6598,11 +6772,12 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, .test_ring = gfx_v11_0_ring_test_ring, .test_ib = gfx_v11_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v11_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, }; @@ -6658,6 +6833,11 @@ static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { .process = gfx_v11_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { + .set = gfx_v11_0_set_bad_op_fault_state, + .process = gfx_v11_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { .set = gfx_v11_0_set_priv_inst_fault_state, .process = gfx_v11_0_priv_inst_irq, @@ -6675,6 +6855,9 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h index 10cfc29c27c9..157a5c812259 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h @@ -26,4 +26,7 @@ extern const struct amdgpu_ip_block_version gfx_v11_0_ip_block; +int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index 9cd221ed240c..999bb3cc88b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -97,7 +97,7 @@ static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev, ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; } - if (con && !con->is_rma) + if (con && !amdgpu_ras_is_rma(adev)) amdgpu_ras_reset_gpu(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f384be0d1800..54059cbcfc08 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -202,6 +202,12 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) }; +static const struct soc15_reg_golden golden_settings_gc_12_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f), + SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -1275,7 +1281,7 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -1288,7 +1294,7 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -1301,7 +1307,7 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.me.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); adev->gfx.ip_dump_gfx_queues = NULL; } else { @@ -1349,6 +1355,13 @@ static int gfx_v12_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, + GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, @@ -1680,26 +1693,68 @@ static void gfx_v12_0_constants_init(struct amdgpu_device *adev) gfx_v12_0_init_compute_vmid(adev); } +static u32 gfx_v12_0_get_cpg_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + if (me != 0) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); + default: + return 0; + } +} + +static u32 gfx_v12_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); + default: + return 0; + } +} + static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, - bool enable) + bool enable) { - u32 tmp; + u32 tmp, cp_int_cntl_reg; + int i, j; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); - - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, - enable ? 1 : 0); - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, - enable ? 1 : 0); - - WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); + } + } + } } static int gfx_v12_0_init_csb(struct amdgpu_device *adev) @@ -2999,7 +3054,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); @@ -3432,6 +3487,24 @@ static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); } +static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) + return; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (adev->rev_id == 0) + soc15_program_register_sequence(adev, + golden_settings_gc_12_0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + break; + default: + break; + } +} + static int gfx_v12_0_hw_init(void *handle) { int r; @@ -3472,6 +3545,9 @@ static int gfx_v12_0_hw_init(void *handle) } } + if (!amdgpu_emu_mode) + gfx_v12_0_init_golden_registers(adev); + adev->gfx.is_poweron = true; if (get_gb_addr_config(adev)) @@ -3519,42 +3595,18 @@ static int gfx_v12_0_hw_init(void *handle) return r; } -static int gfx_v12_0_kiq_disable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - int i, r = 0; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_gfx_rings)) - return -ENOMEM; - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], - PREEMPT_QUEUES, 0, 0); - - if (adev->gfx.kiq[0].ring.sched.ready) - r = amdgpu_ring_test_helper(kiq_ring); - - return r; -} - static int gfx_v12_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; uint32_t tmp; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { - r = gfx_v12_0_kiq_disable_kgq(adev); - if (r) + if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } @@ -3670,6 +3722,10 @@ static int gfx_v12_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + return 0; } @@ -4745,15 +4801,42 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -4762,17 +4845,75 @@ static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev, return 0; } +static int gfx_v12_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + return 0; +} + static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, - PRIV_INSTR_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -4796,8 +4937,8 @@ static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev, case 0: for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; - /* we only enabled 1 gfx queue per pipe for now */ - if (ring->me == me_id && ring->pipe == pipe_id) + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) drm_sched_fault(&ring->sched); } break; @@ -4825,6 +4966,15 @@ static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v12_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream \n"); + gfx_v12_0_handle_priv_fault(adev, entry); + return 0; +} + static int gfx_v12_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -4857,6 +5007,24 @@ static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + static void gfx_v12_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -5038,7 +5206,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = { .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush, .test_ring = gfx_v12_0_ring_test_ring, .test_ib = gfx_v12_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v12_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl, .init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec, @@ -5076,11 +5244,12 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush, .test_ring = gfx_v12_0_ring_test_ring, .test_ib = gfx_v12_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v12_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, }; @@ -5136,6 +5305,11 @@ static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_reg_irq_funcs = { .process = gfx_v12_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v12_0_bad_op_irq_funcs = { + .set = gfx_v12_0_set_bad_op_fault_state, + .process = gfx_v12_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_inst_irq_funcs = { .set = gfx_v12_0_set_priv_inst_fault_state, .process = gfx_v12_0_priv_inst_irq, @@ -5149,6 +5323,9 @@ static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v12_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v12_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v12_0_priv_inst_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index d84589137df9..f146806c4633 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2114,6 +2114,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; + /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. */ @@ -2133,7 +2135,8 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - EVENT_INDEX(5))); + EVENT_INDEX(5) | + (exec ? EOP_EXEC : 0))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -4921,6 +4924,76 @@ static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ } +static void gfx_v7_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static void gfx_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v7_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + +static int gfx_v7_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v7_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) + return -ENOMEM; + gfx_v7_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v7_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); + gfx_v7_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); + + return amdgpu_ring_test_ring(ring); +} + static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .name = "gfx_v7_0", .early_init = gfx_v7_0_early_init, @@ -4972,6 +5045,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .emit_wreg = gfx_v7_0_ring_emit_wreg, .soft_recovery = gfx_v7_0_ring_soft_recovery, .emit_mem_sync = gfx_v7_0_emit_mem_sync, + .reset = gfx_v7_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { @@ -5002,6 +5076,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v7_0_ring_emit_wreg, + .soft_recovery = gfx_v7_0_ring_soft_recovery, .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b4658c7db0e1..bc8295812cc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6149,6 +6149,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. @@ -6172,7 +6173,8 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, EOP_TC_ACTION_EN | EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | - EVENT_INDEX(5))); + EVENT_INDEX(5) | + (exec ? EOP_EXEC : 0))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -6380,6 +6382,34 @@ static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, amdgpu_ring_write(ring, val); } +static void gfx_v8_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static void gfx_v8_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v8_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) { struct amdgpu_device *adev = ring->adev; @@ -6856,6 +6886,48 @@ static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } +static int gfx_v8_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v8_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 12 + 5)) + return -ENOMEM; + gfx_v8_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v8_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff); + gfx_v8_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0); + + return amdgpu_ring_test_ring(ring); +} + static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -6923,6 +6995,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync, + .reset = gfx_v8_0_reset_kgq, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { @@ -6955,6 +7028,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v8_0_ring_emit_wreg, + .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute, .emit_wave_limit = gfx_v8_0_emit_wave_limit, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 2929c8972ea7..21089aadbb7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -50,6 +50,7 @@ #include "amdgpu_ring_mux.h" #include "gfx_v9_4.h" #include "gfx_v9_0.h" +#include "gfx_v9_0_cleaner_shader.h" #include "gfx_v9_4_2.h" #include "asic_reg/pwr/pwr_10_0_offset.h" @@ -893,10 +894,18 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid); +static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { + struct amdgpu_device *adev = kiq_ring->adev; + u64 shader_mc_addr; + + /* Cleaner shader MC address */ + shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | @@ -906,8 +915,8 @@ static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ amdgpu_ring_write(kiq_ring, 0); /* oac mask */ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ } @@ -1004,12 +1013,47 @@ static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); } + +static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid) +{ + struct amdgpu_device *adev = kiq_ring->adev; + unsigned i; + + /* enter save mode */ + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0); + + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + } else { + dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type); + } + + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + /* exit safe mode */ + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); +} + static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_0_kiq_set_resources, .kiq_map_queues = gfx_v9_0_kiq_map_queues, .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, .kiq_query_status = gfx_v9_0_kiq_query_status, .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, + .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, @@ -2129,7 +2173,7 @@ static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); adev->gfx.ip_dump_core = NULL; } else { @@ -2142,7 +2186,7 @@ static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) adev->gfx.mec.num_queue_per_pipe; ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); - if (ptr == NULL) { + if (!ptr) { DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); adev->gfx.ip_dump_compute_queues = NULL; } else { @@ -2174,6 +2218,12 @@ static int gfx_v9_0_sw_init(void *handle) break; } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + default: + adev->gfx.enable_cleaner_shader = false; + break; + } + adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -2182,6 +2232,13 @@ static int gfx_v9_0_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); @@ -2329,6 +2386,10 @@ static int gfx_v9_0_sw_init(void *handle) gfx_v9_0_alloc_ip_dump(adev); + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + return r; + return 0; } @@ -2364,6 +2425,8 @@ static int gfx_v9_0_sw_fini(void *handle) } gfx_v9_0_free_microcode(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); + kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); @@ -2634,7 +2697,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); - if(adev->gfx.num_gfx_rings) + if (adev->gfx.num_gfx_rings) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); @@ -3735,7 +3798,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) return 0; } -static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) +static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -3747,8 +3810,8 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) */ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; - if (!tmp_mqd->cp_hqd_pq_control || - (!amdgpu_in_reset(adev) && !adev->in_suspend)) { + if (!restore && (!tmp_mqd->cp_hqd_pq_control || + (!amdgpu_in_reset(adev) && !adev->in_suspend))) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -3812,7 +3875,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_0_kcq_init_queue(ring); + r = gfx_v9_0_kcq_init_queue(ring, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -3908,6 +3971,9 @@ static int gfx_v9_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, + adev->gfx.cleaner_shader_ptr); + if (!amdgpu_sriov_vf(adev)) gfx_v9_0_init_golden_registers(adev); @@ -3937,6 +4003,7 @@ static int gfx_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) @@ -4747,6 +4814,10 @@ static int gfx_v9_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + r = gfx_v9_0_ecc_late_init(handle); if (r) return r; @@ -5858,7 +5929,9 @@ static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); WREG32_SOC15(GC, 0, mmSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, @@ -5929,17 +6002,95 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, } } +static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev, + int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, enum amdgpu_interrupt_state state) { + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } + break; + default: + break; + } + + return 0; +} + +static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 cp_int_cntl_reg, cp_int_cntl; + int i, j; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + /* MECs start at 1 */ + cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); + + if (cp_int_cntl_reg) { + cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); + } + } + } break; default: break; @@ -6121,6 +6272,15 @@ static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream\n"); + gfx_v9_0_fault(adev, entry); + return 0; +} + static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -7001,6 +7161,153 @@ static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + +static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + u32 tmp; + int r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, 5)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + gfx_v9_0_ring_emit_wreg(kiq_ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + if (amdgpu_ring_alloc(ring, 7 + 7 + 5)) + return -ENOMEM; + gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr, + ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); + gfx_v9_0_ring_emit_reg_wait(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff); + gfx_v9_0_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); + + return amdgpu_ring_test_ring(ring); +} + +static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + int i, r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, + 0, 0); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + /* make sure dequeue is complete*/ + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + return r; + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)){ + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_0_kcq_init_queue(ring, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); + return r; + } + spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + DRM_ERROR("fail to remap queue\n"); + return r; + } + return amdgpu_ring_test_ring(ring); +} + static void gfx_v9_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -7083,6 +7390,13 @@ static void gfx_v9_ip_dump(void *handle) } +static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) +{ + /* Emit the cleaner shader */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -7132,7 +7446,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2 + /* SWITCH_BUFFER */ - 7, /* gfx_v9_0_emit_mem_sync */ + 7 + /* gfx_v9_0_emit_mem_sync */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7141,7 +7456,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v9_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, @@ -7153,6 +7468,10 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, + .reset = gfx_v9_0_reset_kgq, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { @@ -7185,7 +7504,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2 + /* SWITCH_BUFFER */ - 7, /* gfx_v9_0_emit_mem_sync */ + 7 + /* gfx_v9_0_emit_mem_sync */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7195,7 +7515,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, .test_ib = gfx_v9_0_ring_test_ib, - .insert_nop = amdgpu_sw_ring_insert_nop, + .insert_nop = gfx_v9_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, @@ -7209,6 +7529,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .patch_cntl = gfx_v9_0_ring_patch_cntl, .patch_de = gfx_v9_0_ring_patch_de_meta, .patch_ce = gfx_v9_0_ring_patch_ce_meta, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -7229,7 +7552,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_0_emit_mem_sync */ 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ - 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ + 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ + 2, /* gfx_v9_0_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence, @@ -7239,13 +7563,18 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, .test_ib = gfx_v9_0_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v9_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, .emit_wave_limit = gfx_v9_0_emit_wave_limit, + .reset = gfx_v9_0_reset_kcq, + .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { @@ -7303,6 +7632,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { .process = gfx_v9_0_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = { + .set = gfx_v9_0_set_bad_op_fault_state, + .process = gfx_v9_0_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { .set = gfx_v9_0_set_priv_inst_fault_state, .process = gfx_v9_0_priv_inst_irq, @@ -7322,6 +7656,9 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h new file mode 100644 index 000000000000..36c0292b5110 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +/* Define the cleaner shader gfx_9_0 */ +static const u32 __maybe_unused gfx_9_0_cleaner_shader_hex[] = { + /* Add the cleaner shader code here */ +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 20ea6cb01edf..2067f26d3a9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -37,6 +37,7 @@ #include "gc/gc_9_4_3_sh_mask.h" #include "gfx_v9_4_3.h" +#include "gfx_v9_4_3_cleaner_shader.h" #include "amdgpu_xcp.h" #include "amdgpu_aca.h" @@ -63,6 +64,98 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); #define NORMALIZE_XCC_REG_OFFSET(offset) \ (offset & 0xFFFF) +static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = { + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_DCACHE_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_ICACHE_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQ_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regTCP_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regVM_L2_PROTECTION_FAULT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC2_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_COMMAND), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_MESSAGE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_1), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_ARGUMENT_2), + SOC15_REG_ENTRY_STR(GC, 0, regSMU_RLC_RESPONSE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP), + /* SE status registers */ + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3) +}; + +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = { + /* compute queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS), +}; + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -71,10 +164,18 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev); static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); +static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { + struct amdgpu_device *adev = kiq_ring->adev; + u64 shader_mc_addr; + + /* Cleaner shader MC address */ + shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | @@ -84,8 +185,8 @@ static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ - amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ amdgpu_ring_write(kiq_ring, 0); /* oac mask */ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ } @@ -182,12 +283,46 @@ static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); } +static void gfx_v9_4_3_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid) +{ + struct amdgpu_device *adev = kiq_ring->adev; + unsigned i; + + /* enter save mode */ + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, xcc_id); + + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_COMPUTE_QUEUE_RESET, 0x1); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + } else { + dev_err(adev->dev, "reset queue_type(%d) not supported\n\n", queue_type); + } + + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + /* exit safe mode */ + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); +} + static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { .kiq_set_resources = gfx_v9_4_3_kiq_set_resources, .kiq_map_queues = gfx_v9_4_3_kiq_map_queues, .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues, .kiq_query_status = gfx_v9_4_3_kiq_query_status, .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs, + .kiq_reset_hw_queue = gfx_v9_4_3_kiq_reset_hw_queue, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, @@ -885,11 +1020,59 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio, NULL); } +static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); + uint32_t *ptr, num_xcc, inst; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + ptr = kcalloc(reg_count * num_xcc, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); + adev->gfx.ip_dump_core = NULL; + } else { + adev->gfx.ip_dump_core = ptr; + } + + /* Allocate memory for compute queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst * num_xcc, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); + adev->gfx.ip_dump_compute_queues = NULL; + } else { + adev->gfx.ip_dump_compute_queues = ptr; + } +} + static int gfx_v9_4_3_sw_init(void *handle) { int i, j, k, r, ring_id, xcc_id, num_xcc; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + adev->gfx.cleaner_shader_ptr = gfx_9_4_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_3_cleaner_shader_hex); + if (adev->gfx.mec_fw_version >= 153) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + default: + adev->gfx.enable_cleaner_shader = false; + break; + } + adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; @@ -901,6 +1084,13 @@ static int gfx_v9_4_3_sw_init(void *handle) if (r) return r; + /* Bad opcode Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR, + &adev->gfx.bad_op_irq); + if (r) + return r; + /* Privileged reg */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); @@ -976,10 +1166,19 @@ static int gfx_v9_4_3_sw_init(void *handle) return r; - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { r = amdgpu_gfx_sysfs_init(adev); + if (r) + return r; + } - return r; + gfx_v9_4_3_alloc_ip_dump(adev); + + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + return r; + + return 0; } static int gfx_v9_4_3_sw_fini(void *handle) @@ -997,11 +1196,17 @@ static int gfx_v9_4_3_sw_fini(void *handle) amdgpu_gfx_kiq_fini(adev, i); } + amdgpu_gfx_cleaner_shader_sw_fini(adev); + gfx_v9_4_3_mec_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); gfx_v9_4_3_free_microcode(adev); if (!amdgpu_sriov_vf(adev)) amdgpu_gfx_sysfs_fini(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); + + kfree(adev->gfx.ip_dump_core); + kfree(adev->gfx.ip_dump_compute_queues); return 0; } @@ -1910,7 +2115,7 @@ static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) return 0; } -static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) +static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, bool restore) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; @@ -1922,8 +2127,8 @@ static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) */ tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; - if (!tmp_mqd->cp_hqd_pq_control || - (!amdgpu_in_reset(adev) && !adev->in_suspend)) { + if (!restore && (!tmp_mqd->cp_hqd_pq_control || + (!amdgpu_in_reset(adev) && !adev->in_suspend))) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -2008,7 +2213,7 @@ static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id); + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -2139,6 +2344,9 @@ static int gfx_v9_4_3_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, + adev->gfx.cleaner_shader_ptr); + if (!amdgpu_sriov_vf(adev)) gfx_v9_4_3_init_golden_registers(adev); @@ -2162,6 +2370,7 @@ static int gfx_v9_4_3_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { @@ -2327,6 +2536,10 @@ static int gfx_v9_4_3_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); + if (r) + return r; + if (adev->gfx.ras && adev->gfx.ras->enable_watchdog_timer) adev->gfx.ras->enable_watchdog_timer(adev); @@ -2833,6 +3046,21 @@ static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask); } +static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring, + unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t value = 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id); + WREG32_SOC15(GC, GET_INST(GC, ring->xcc_id), regSQ_CMD, value); + amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id); +} + static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( struct amdgpu_device *adev, int me, int pipe, enum amdgpu_interrupt_state state, int xcc_id) @@ -2886,21 +3114,103 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( } } +static u32 gfx_v9_4_3_get_cpc_int_cntl(struct amdgpu_device *adev, + int xcc_id, int me, int pipe) +{ + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + if (me != 1) + return 0; + + switch (pipe) { + case 0: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL); + case 1: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL); + case 2: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL); + case 3: + return SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL); + default: + return 0; + } +} + static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, enum amdgpu_interrupt_state state) { - int i, num_xcc; + u32 mec_int_cntl_reg, mec_int_cntl; + int i, j, k, num_xcc; num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - for (i = 0; i < num_xcc; i++) + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, - PRIV_REG_INT_ENABLE, - state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (j = 0; j < adev->gfx.mec.num_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + /* MECs start at 1 */ + mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k); + + if (mec_int_cntl_reg) { + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? + 1 : 0); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i); + } + } + } + } + break; + default: + break; + } + + return 0; +} + +static int gfx_v9_4_3_set_bad_op_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 mec_int_cntl_reg, mec_int_cntl; + int i, j, k, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < num_xcc; i++) { + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + for (j = 0; j < adev->gfx.mec.num_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + /* MECs start at 1 */ + mec_int_cntl_reg = gfx_v9_4_3_get_cpc_int_cntl(adev, i, j + 1, k); + + if (mec_int_cntl_reg) { + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, i); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + OPCODE_ERROR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? + 1 : 0); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, i); + } + } + } + } break; default: break; @@ -3061,6 +3371,15 @@ static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v9_4_3_bad_op_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal opcode in command stream\n"); + gfx_v9_4_3_fault(adev, entry); + return 0; +} + static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -3147,6 +3466,91 @@ static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable) } } +static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, + unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[ring->xcc_id]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; + int r, i; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, + 0, 0); + amdgpu_ring_commit(kiq_ring); + + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) + return r; + + /* make sure dequeue is complete*/ + amdgpu_gfx_rlc_enter_safe_mode(adev, ring->xcc_id); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, ring->xcc_id)); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, ring->xcc_id)); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_rlc_exit_safe_mode(adev, ring->xcc_id); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + return r; + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)){ + dev_err(adev->dev, "fail to resv mqd_obj\n"); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); + return r; + } + spin_lock_irqsave(&kiq->ring_lock, flags); + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + dev_err(adev->dev, "fail to remap queue\n"); + return r; + } + return amdgpu_ring_test_ring(ring); +} + enum amdgpu_gfx_cp_ras_mem_id { AMDGPU_GFX_CP_MEM1 = 1, AMDGPU_GFX_CP_MEM2, @@ -3959,8 +4363,8 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, /* the caller should make sure initialize value of * err_data->ue_count and err_data->ce_count */ - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); } static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, @@ -4062,6 +4466,151 @@ static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev) amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer); } +static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) +{ + int i; + + /* Header itself is a NOP packet */ + if (num_nop == 1) { + amdgpu_ring_write(ring, ring->funcs->nop); + return; + } + + /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); + + /* Header is at index 0, followed by num_nops - 1 NOP packet's */ + for (i = 1; i < num_nop; i++) + amdgpu_ring_write(ring, ring->funcs->nop); +} + +static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k; + uint32_t xcc_id, xcc_offset, inst_offset; + uint32_t num_xcc, reg, num_inst; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); + + if (!adev->gfx.ip_dump_core) + return; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + drm_printf(p, "Number of Instances:%d\n", num_xcc); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count; + drm_printf(p, "\nInstance id:%d\n", xcc_id); + for (i = 0; i < reg_count; i++) + drm_printf(p, "%-50s \t 0x%08x\n", + gc_reg_list_9_4_3[i].reg_name, + adev->gfx.ip_dump_core[xcc_offset + i]); + } + + /* print compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + drm_printf(p, "\nnum_xcc: %d num_mec: %d num_pipe: %d num_queue: %d\n", + num_xcc, + adev->gfx.mec.num_mec, + adev->gfx.mec.num_pipe_per_mec, + adev->gfx.mec.num_queue_per_pipe); + + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count * num_inst; + inst_offset = 0; + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + drm_printf(p, + "\nxcc:%d mec:%d, pipe:%d, queue:%d\n", + xcc_id, i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, + "%-50s \t 0x%08x\n", + gc_cp_reg_list_9_4_3[reg].reg_name, + adev->gfx.ip_dump_compute_queues + [xcc_offset + inst_offset + + reg]); + } + inst_offset += reg_count; + } + } + } + } +} + +static void gfx_v9_4_3_ip_dump(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k; + uint32_t num_xcc, reg, num_inst; + uint32_t xcc_id, xcc_offset, inst_offset; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9_4_3); + + if (!adev->gfx.ip_dump_core) + return; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + amdgpu_gfx_off_ctrl(adev, false); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count; + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump_core[xcc_offset + i] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(gc_reg_list_9_4_3[i], + GET_INST(GC, xcc_id))); + } + amdgpu_gfx_off_ctrl(adev, true); + + /* dump compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + xcc_offset = xcc_id * reg_count * num_inst; + inst_offset = 0; + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + /* ME0 is for GFX so start from 1 for CP */ + soc15_grbm_select(adev, 1 + i, j, k, 0, + GET_INST(GC, xcc_id)); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_compute_queues + [xcc_offset + + inst_offset + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST( + gc_cp_reg_list_9_4_3[reg], + GET_INST(GC, xcc_id))); + } + inst_offset += reg_count; + } + } + } + } + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); +} + +static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring) +{ + /* Emit the cleaner shader */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ +} + static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .name = "gfx_v9_4_3", .early_init = gfx_v9_4_3_early_init, @@ -4078,8 +4627,8 @@ static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .set_clockgating_state = gfx_v9_4_3_set_clockgating_state, .set_powergating_state = gfx_v9_4_3_set_powergating_state, .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = gfx_v9_4_3_ip_dump, + .print_ip_state = gfx_v9_4_3_ip_print, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { @@ -4101,7 +4650,8 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { 8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_4_3_emit_mem_sync */ 5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */ - 15, /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ + 15 + /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ + 2, /* gfx_v9_4_3_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */ .emit_ib = gfx_v9_4_3_ring_emit_ib_compute, .emit_fence = gfx_v9_4_3_ring_emit_fence, @@ -4111,13 +4661,18 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush, .test_ring = gfx_v9_4_3_ring_test_ring, .test_ib = gfx_v9_4_3_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = gfx_v9_4_3_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v9_4_3_ring_emit_wreg, .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_4_3_ring_soft_recovery, .emit_mem_sync = gfx_v9_4_3_emit_mem_sync, .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, + .reset = gfx_v9_4_3_reset_kcq, + .emit_cleaner_shader = gfx_v9_4_3_ring_emit_cleaner_shader, + .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, + .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { @@ -4172,6 +4727,11 @@ static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = { .process = gfx_v9_4_3_priv_reg_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v9_4_3_bad_op_irq_funcs = { + .set = gfx_v9_4_3_set_bad_op_fault_state, + .process = gfx_v9_4_3_bad_op_irq, +}; + static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = { .set = gfx_v9_4_3_set_priv_inst_fault_state, .process = gfx_v9_4_3_priv_inst_irq, @@ -4185,6 +4745,9 @@ static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs; + adev->gfx.bad_op_irq.num_types = 1; + adev->gfx.bad_op_irq.funcs = &gfx_v9_4_3_bad_op_irq_funcs; + adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm new file mode 100644 index 000000000000..d5325ef80ab0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.asm @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. +//To turn this shader program on for complitaion change this to main and lower shader main to main_1 + +// MI300 : Clear SGPRs, VGPRs and LDS +// Uses two kernels launched separately: +// 1. Clean VGPRs, LDS, and lower SGPRs +// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU +// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD +// Waves in the workgroup share the 64KB of LDS +// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383 +// Each wave clears 128 VGPRs, so all 512 in the SIMD +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. +// 2. Clean remaining SGPRs +// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU +// Waves are allocating 96 SGPRs +// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223. +// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799 +// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER +// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command +// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799 + +shader main + asic(MI300) + type(CS) + wave_size(64) +// Note: original source code from SQ team + +// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks) + + s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3 + s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1) + S_BARRIER + + s_movk_i32 m0, 0x0000 + s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance) + // + // CLEAR VGPRs + // + s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing +label_0005: + v_mov_b32 v0, 0 + v_mov_b32 v1, 0 + v_mov_b32 v2, 0 + v_mov_b32 v3, 0 + v_mov_b32 v4, 0 + v_mov_b32 v5, 0 + v_mov_b32 v6, 0 + v_mov_b32 v7, 0 + s_sub_u32 s2, s2, 8 + s_set_gpr_idx_idx s2 + s_cbranch_scc0 label_0005 + s_set_gpr_idx_off + + // + // + + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iteraions + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + // + // CLEAR SGPRs + // +label_clean_sgpr_1: + s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance) + s_nop 0 +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc, flat scratch + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0 //clear vcc + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 +s_endpgm + +label_0023: + + s_sethalt 1 + + s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance) + s_nop 0 +label_sgpr_loop1: + + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop1 + + //clear vcc, flat scratch + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0xee //clear vcc + +s_endpgm +end + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h new file mode 100644 index 000000000000..69aa567c6c1d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3_cleaner_shader.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Define the cleaner shader gfx_9_4_3 */ +static const u32 gfx_9_4_3_cleaner_shader_hex[] = { + 0xbf068100, 0xbf84003b, + 0xbf8a0000, 0xb07c0000, + 0xbe8200ff, 0x00000078, + 0xbf110802, 0x7e000280, + 0x7e020280, 0x7e040280, + 0x7e060280, 0x7e080280, + 0x7e0a0280, 0x7e0c0280, + 0x7e0e0280, 0x80828802, + 0xbe803202, 0xbf84fff5, + 0xbf9c0000, 0xbe8200ff, + 0x80000000, 0x86020102, + 0xbf840011, 0xbefe00c1, + 0xbeff00c1, 0xd28c0001, + 0x0001007f, 0xd28d0001, + 0x0002027e, 0x10020288, + 0xbe8200bf, 0xbefc00c1, + 0xd89c2000, 0x00020201, + 0xd89c6040, 0x00040401, + 0x320202ff, 0x00000400, + 0x80828102, 0xbf84fff8, + 0xbefc00ff, 0x0000005c, + 0xbf800000, 0xbe802c80, + 0xbe812c80, 0xbe822c80, + 0xbe832c80, 0x80fc847c, + 0xbf84fffa, 0xbee60080, + 0xbee70080, 0xbeea0180, + 0xbeec0180, 0xbeee0180, + 0xbef00180, 0xbef20180, + 0xbef40180, 0xbef60180, + 0xbef80180, 0xbefa0180, + 0xbf810000, 0xbf8d0001, + 0xbefc00ff, 0x0000005c, + 0xbf800000, 0xbe802c80, + 0xbe812c80, 0xbe822c80, + 0xbe832c80, 0x80fc847c, + 0xbf84fffa, 0xbee60080, + 0xbee70080, 0xbeea01ff, + 0x000000ee, 0xbf810000, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index d200310d1731..0e3ddea7b8e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -443,23 +443,6 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev) mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } -static bool gfxhub_v1_0_query_utcl2_poison_status(struct amdgpu_device *adev, - int xcc_id) -{ - u32 status = 0; - struct amdgpu_vmhub *hub; - - if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) - return false; - - hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - status = RREG32(hub->vm_l2_pro_fault_status); - /* reset page fault status */ - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - - return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); -} - const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs, @@ -468,5 +451,4 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, .init = gfxhub_v1_0_init, .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, - .query_utcl2_poison_status = gfxhub_v1_0_query_utcl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 72109abe7c86..ed8e130c7d19 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -622,22 +622,6 @@ static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev) return 0; } -static bool gfxhub_v1_2_query_utcl2_poison_status(struct amdgpu_device *adev, - int xcc_id) -{ - u32 fed, status; - - status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVM_L2_PROTECTION_FAULT_STATUS); - fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); - if (!amdgpu_sriov_vf(adev)) { - /* clear page fault status and address */ - WREG32_P(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), - regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1); - } - - return fed; -} - const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs, @@ -646,7 +630,6 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default, .init = gfxhub_v1_2_init, .get_xgmi_info = gfxhub_v1_2_get_xgmi_info, - .query_utcl2_poison_status = gfxhub_v1_2_query_utcl2_poison_status, }; static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f0ceab3ce5bf..9784a2892185 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -132,7 +132,8 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, + entry->timestamp, write_fault)) return 1; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index b88a6fa173b3..2797fd84432b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -231,7 +231,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid, GET_INST(GC, 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index fd3ac483760e..edcb5351f8cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -299,7 +299,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; @@ -542,6 +542,23 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) return 0; } +static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) +{ + unsigned int max_tex_channel_caches, alignment; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1)) + return 0; + + max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches; + if (is_power_of_2(max_tex_channel_caches)) + alignment = (unsigned int)(max_tex_channel_caches / SZ_4); + else + alignment = roundup_pow_of_two(max_tex_channel_caches); + + return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K); +} + static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, @@ -551,6 +568,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pte = gmc_v12_0_get_vm_pte, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, }; static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b73136d390cc..c76ac0dfe572 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -595,7 +595,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, cam_index = entry->src_data[2] & 0x3ff; ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault); + addr, entry->timestamp, write_fault); WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) return 1; @@ -618,7 +618,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * tables */ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault)) + addr, entry->timestamp, write_fault)) return 1; } } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 077c6d920e27..e019249883fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -41,7 +41,7 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index a9ea23fa0def..ed7facacf2fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -32,7 +32,7 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index ab06c2b4b20b..33736d361dd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -35,7 +35,7 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 8d7d0813e331..1c99bb09e2a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -32,7 +32,7 @@ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c index aac107898bae..964c29ef25dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c @@ -42,23 +42,23 @@ static const unsigned int isp_4_1_0_int_srcid[MAX_ISP410_INT_SRC] = { static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) { struct amdgpu_device *adev = isp->adev; + int idx, int_idx, num_res, r; u64 isp_base; - int int_idx; - int r; if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) return -EINVAL; isp_base = adev->rmmio_base; - isp->isp_cell = kcalloc(1, sizeof(struct mfd_cell), GFP_KERNEL); + isp->isp_cell = kcalloc(2, sizeof(struct mfd_cell), GFP_KERNEL); if (!isp->isp_cell) { r = -ENOMEM; DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__); goto failure; } - isp->isp_res = kcalloc(MAX_ISP410_INT_SRC + 1, sizeof(struct resource), + num_res = MAX_ISP410_MEM_RES + MAX_ISP410_SENSOR_RES + MAX_ISP410_INT_SRC; + isp->isp_res = kcalloc(num_res, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_res) { r = -ENOMEM; @@ -83,22 +83,53 @@ static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) isp->isp_res[0].start = isp_base; isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END; - for (int_idx = 0; int_idx < MAX_ISP410_INT_SRC; int_idx++) { - isp->isp_res[int_idx + 1].name = "isp_4_1_0_irq"; - isp->isp_res[int_idx + 1].flags = IORESOURCE_IRQ; - isp->isp_res[int_idx + 1].start = + isp->isp_res[1].name = "isp_4_1_phy0_reg"; + isp->isp_res[1].flags = IORESOURCE_MEM; + isp->isp_res[1].start = isp_base + ISP410_PHY0_OFFSET; + isp->isp_res[1].end = isp_base + ISP410_PHY0_OFFSET + ISP410_PHY0_SIZE; + + isp->isp_res[2].name = "isp_gpio_sensor0_reg"; + isp->isp_res[2].flags = IORESOURCE_MEM; + isp->isp_res[2].start = isp_base + ISP410_GPIO_SENSOR0_OFFSET; + isp->isp_res[2].end = isp_base + ISP410_GPIO_SENSOR0_OFFSET + + ISP410_GPIO_SENSOR0_SIZE; + + for (idx = MAX_ISP410_MEM_RES + MAX_ISP410_SENSOR_RES, int_idx = 0; + idx < num_res; idx++, int_idx++) { + isp->isp_res[idx].name = "isp_4_1_0_irq"; + isp->isp_res[idx].flags = IORESOURCE_IRQ; + isp->isp_res[idx].start = amdgpu_irq_create_mapping(adev, isp_4_1_0_int_srcid[int_idx]); - isp->isp_res[int_idx + 1].end = - isp->isp_res[int_idx + 1].start; + isp->isp_res[idx].end = + isp->isp_res[idx].start; } isp->isp_cell[0].name = "amd_isp_capture"; - isp->isp_cell[0].num_resources = MAX_ISP410_INT_SRC + 1; + isp->isp_cell[0].num_resources = num_res; isp->isp_cell[0].resources = &isp->isp_res[0]; isp->isp_cell[0].platform_data = isp->isp_pdata; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); - r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 1); + isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), + GFP_KERNEL); + if (!isp->isp_i2c_res) { + r = -ENOMEM; + DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + goto failure; + } + + isp->isp_i2c_res[0].name = "isp_i2c0_reg"; + isp->isp_i2c_res[0].flags = IORESOURCE_MEM; + isp->isp_i2c_res[0].start = isp_base + ISP410_I2C0_OFFSET; + isp->isp_i2c_res[0].end = isp_base + ISP410_I2C0_OFFSET + ISP410_I2C0_SIZE; + + isp->isp_cell[1].name = "amd_isp_i2c_designware"; + isp->isp_cell[1].num_resources = 1; + isp->isp_cell[1].resources = &isp->isp_i2c_res[0]; + isp->isp_cell[1].platform_data = isp->isp_pdata; + isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data); + + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2); if (r) { DRM_ERROR("%s: add mfd hotplug device failed\n", __func__); goto failure; @@ -111,6 +142,7 @@ failure: kfree(isp->isp_pdata); kfree(isp->isp_res); kfree(isp->isp_cell); + kfree(isp->isp_i2c_res); return r; } @@ -122,6 +154,7 @@ static int isp_v4_1_0_hw_fini(struct amdgpu_isp *isp) kfree(isp->isp_res); kfree(isp->isp_cell); kfree(isp->isp_pdata); + kfree(isp->isp_i2c_res); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h index 315f2822410c..7db24c0f1080 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h @@ -32,8 +32,19 @@ #include "ivsrcid/isp/irqsrcs_isp_4_1.h" +#define MAX_ISP410_MEM_RES 2 +#define MAX_ISP410_SENSOR_RES 1 #define MAX_ISP410_INT_SRC 8 +#define ISP410_PHY0_OFFSET 0x66700 +#define ISP410_PHY0_SIZE 0xD30 + +#define ISP410_I2C0_OFFSET 0x66400 +#define ISP410_I2C0_SIZE 0x100 + +#define ISP410_GPIO_SENSOR0_OFFSET 0x6613C +#define ISP410_GPIO_SENSOR0_SIZE 0x4 + void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index 4e17fa03f7b5..b56f27295468 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -42,23 +42,24 @@ static const unsigned int isp_4_1_1_int_srcid[MAX_ISP411_INT_SRC] = { static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) { struct amdgpu_device *adev = isp->adev; + int idx, int_idx, num_res, r; u64 isp_base; - int int_idx; - int r; if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) return -EINVAL; isp_base = adev->rmmio_base; - isp->isp_cell = kcalloc(1, sizeof(struct mfd_cell), GFP_KERNEL); + isp->isp_cell = kcalloc(2, sizeof(struct mfd_cell), GFP_KERNEL); if (!isp->isp_cell) { r = -ENOMEM; DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__); goto failure; } - isp->isp_res = kcalloc(MAX_ISP411_INT_SRC + 1, sizeof(struct resource), + num_res = MAX_ISP411_MEM_RES + MAX_ISP411_SENSOR_RES + MAX_ISP411_INT_SRC; + + isp->isp_res = kcalloc(num_res, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_res) { r = -ENOMEM; @@ -83,22 +84,52 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_res[0].start = isp_base; isp->isp_res[0].end = isp_base + ISP_REGS_OFFSET_END; - for (int_idx = 0; int_idx < MAX_ISP411_INT_SRC; int_idx++) { - isp->isp_res[int_idx + 1].name = "isp_4_1_1_irq"; - isp->isp_res[int_idx + 1].flags = IORESOURCE_IRQ; - isp->isp_res[int_idx + 1].start = + isp->isp_res[1].name = "isp_4_1_1_phy0_reg"; + isp->isp_res[1].flags = IORESOURCE_MEM; + isp->isp_res[1].start = isp_base + ISP411_PHY0_OFFSET; + isp->isp_res[1].end = isp_base + ISP411_PHY0_OFFSET + ISP411_PHY0_SIZE; + + isp->isp_res[2].name = "isp_4_1_1_sensor0_reg"; + isp->isp_res[2].flags = IORESOURCE_MEM; + isp->isp_res[2].start = isp_base + ISP411_GPIO_SENSOR0_OFFSET; + isp->isp_res[2].end = isp_base + ISP411_GPIO_SENSOR0_OFFSET + + ISP411_GPIO_SENSOR0_SIZE; + + for (idx = MAX_ISP411_MEM_RES + MAX_ISP411_SENSOR_RES, int_idx = 0; + idx < num_res; idx++, int_idx++) { + isp->isp_res[idx].name = "isp_4_1_1_irq"; + isp->isp_res[idx].flags = IORESOURCE_IRQ; + isp->isp_res[idx].start = amdgpu_irq_create_mapping(adev, isp_4_1_1_int_srcid[int_idx]); - isp->isp_res[int_idx + 1].end = - isp->isp_res[int_idx + 1].start; + isp->isp_res[idx].end = + isp->isp_res[idx].start; } isp->isp_cell[0].name = "amd_isp_capture"; - isp->isp_cell[0].num_resources = MAX_ISP411_INT_SRC + 1; + isp->isp_cell[0].num_resources = num_res; isp->isp_cell[0].resources = &isp->isp_res[0]; isp->isp_cell[0].platform_data = isp->isp_pdata; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); - r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 1); + isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL); + if (!isp->isp_i2c_res) { + r = -ENOMEM; + DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + goto failure; + } + + isp->isp_i2c_res[0].name = "isp_i2c0_reg"; + isp->isp_i2c_res[0].flags = IORESOURCE_MEM; + isp->isp_i2c_res[0].start = isp_base + ISP411_I2C0_OFFSET; + isp->isp_i2c_res[0].end = isp_base + ISP411_I2C0_OFFSET + ISP411_I2C0_SIZE; + + isp->isp_cell[1].name = "amd_isp_i2c_designware"; + isp->isp_cell[1].num_resources = 1; + isp->isp_cell[1].resources = &isp->isp_i2c_res[0]; + isp->isp_cell[1].platform_data = isp->isp_pdata; + isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data); + + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2); if (r) { DRM_ERROR("%s: add mfd hotplug device failed\n", __func__); goto failure; @@ -111,6 +142,7 @@ failure: kfree(isp->isp_pdata); kfree(isp->isp_res); kfree(isp->isp_cell); + kfree(isp->isp_i2c_res); return r; } @@ -122,6 +154,7 @@ static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp) kfree(isp->isp_res); kfree(isp->isp_cell); kfree(isp->isp_pdata); + kfree(isp->isp_i2c_res); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h index dfb9522c9d6a..40887ddeb08c 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h @@ -32,8 +32,19 @@ #include "ivsrcid/isp/irqsrcs_isp_4_1.h" +#define MAX_ISP411_MEM_RES 2 +#define MAX_ISP411_SENSOR_RES 1 #define MAX_ISP411_INT_SRC 8 +#define ISP411_PHY0_OFFSET 0x66700 +#define ISP411_PHY0_SIZE 0xD30 + +#define ISP411_I2C0_OFFSET 0x66400 +#define ISP411_I2C0_SIZE 0x100 + +#define ISP411_GPIO_SENSOR0_OFFSET 0x6613C +#define ISP411_GPIO_SENSOR0_SIZE 0x4 + void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 99adf3625657..98aa3ccd0d20 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -538,11 +538,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ad524ddc9760..6ae5a784e187 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" @@ -782,11 +783,15 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1084,6 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -1248,3 +1254,56 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) { adev->jpeg.ras = &jpeg_v4_0_3_ras; } + +/** + * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + uint32_t i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 747a3e5f6856..71c54b294e15 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -46,6 +46,9 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 + extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, @@ -62,5 +65,7 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); - +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index d694a276498a..f4daff90c770 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -646,6 +646,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index f9343642ae7e..8edcd85a1261 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -162,13 +162,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -191,11 +191,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(&mes->ring_lock[0], flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -208,14 +215,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); op_str = mes_v11_0_get_op_string(x_pkt); misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); @@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) @@ -252,8 +258,12 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -350,6 +360,26 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; + /*mes_reset_queue_pkt.reset_queue_only = 1;*/ + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { @@ -411,13 +441,41 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes, struct mes_suspend_gang_input *input) { - return 0; + union MESAPI__SUSPEND mes_suspend_gang_pkt; + + memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt)); + + mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND; + mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs; + mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; + mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; + mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), + offsetof(union MESAPI__SUSPEND, api_status)); } static int mes_v11_0_resume_gang(struct amdgpu_mes *mes, struct mes_resume_gang_input *input) { - return 0; + union MESAPI__RESUME mes_resume_gang_pkt; + + memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt)); + + mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME; + mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; + mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), + offsetof(union MESAPI__RESUME, api_status)); } static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) @@ -512,9 +570,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; + mes->query_status_fence_gpu_addr[0]; for (i = 0; i < MAX_COMPUTE_PIPES; i++) mes_set_hw_res_pkt.compute_hqd_mask[i] = @@ -585,6 +643,38 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } +static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + + if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + mes_reset_queue_pkt.reset_legacy_gfx = 1; + mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; + mes_reset_queue_pkt.queue_id_lp = input->queue_id; + mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; + mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; + mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; + mes_reset_queue_pkt.vmid_id_lp = input->vmid; + } else { + mes_reset_queue_pkt.reset_queue_only = 1; + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + } + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, @@ -593,6 +683,8 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .suspend_gang = mes_v11_0_suspend_gang, .resume_gang = mes_v11_0_resume_gang, .misc_op = mes_v11_0_misc_op, + .reset_legacy_queue = mes_v11_0_reset_legacy_queue, + .reset_hw_queue = mes_v11_0_reset_hw_queue, }; static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, @@ -1015,7 +1107,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); return amdgpu_ring_test_helper(kiq_ring); } @@ -1029,7 +1121,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1071,7 +1163,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v11_0_ring_funcs; @@ -1124,7 +1216,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1200,9 +1292,6 @@ static int mes_v11_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1216,12 +1305,12 @@ static int mes_v11_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1332,9 +1421,9 @@ failure: static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v11_0_kiq_dequeue(&adev->mes.ring); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + mes_v11_0_kiq_dequeue(&adev->mes.ring[0]); + adev->mes.ring[0].sched.ready = false; } if (amdgpu_sriov_vf(adev)) { @@ -1352,7 +1441,7 @@ static int mes_v11_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; if (!adev->enable_mes_kiq) { @@ -1397,7 +1486,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 0713bc3eb263..47a73f6ae4da 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -142,19 +142,20 @@ static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt) } static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size, - int api_status_off) + int pipe, void *pkt, int size, + int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[pipe]; + spinlock_t *ring_lock = &mes->ring_lock[pipe]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -177,11 +178,18 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(ring_lock, flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -194,39 +202,39 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(ring_lock, flags); op_str = mes_v12_0_get_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); if (misc_op_str) - dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, - misc_op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n", + pipe, op_str); else - dev_dbg(adev->dev, "MES msg=%d was emitted\n", - x_pkt->header.opcode); + dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n", + pipe, x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", - op_str, misc_op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s\n", - op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n", + pipe, op_str); else - dev_err(adev->dev, "MES failed to respond to msg=%d\n", - x_pkt->header.opcode); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n", + pipe, x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); @@ -238,8 +246,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(ring_lock, flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -254,6 +266,8 @@ static int convert_to_mes_queue_type(int queue_type) return MES_QUEUE_TYPE_COMPUTE; else if (queue_type == AMDGPU_RING_TYPE_SDMA) return MES_QUEUE_TYPE_SDMA; + else if (queue_type == AMDGPU_RING_TYPE_MES) + return MES_QUEUE_TYPE_SCHQ; else BUG(); return -1; @@ -311,6 +325,7 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gds_size = input->queue_size; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -330,14 +345,42 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + int pipe; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr; + /*mes_reset_queue_pkt.reset_queue_only = 1;*/ + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); +} + static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { union MESAPI__ADD_QUEUE mes_add_queue_pkt; + int pipe; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); @@ -354,7 +397,12 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); mes_add_queue_pkt.map_legacy_kq = 1; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -363,6 +411,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input) { union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + int pipe; memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); @@ -387,7 +436,12 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -404,7 +458,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, return 0; } -static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) +static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; @@ -414,7 +468,7 @@ static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_status_pkt, sizeof(mes_status_pkt), offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } @@ -423,6 +477,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, struct mes_misc_op_input *input) { union MESAPI__MISC misc_pkt; + int pipe; memset(&misc_pkt, 0, sizeof(misc_pkt)); @@ -475,12 +530,17 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); } -static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) { union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; @@ -491,12 +551,12 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) { int i; struct amdgpu_device *adev = mes->adev; @@ -508,27 +568,33 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; - mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; - mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; - mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; - mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; - - for (i = 0; i < MAX_COMPUTE_PIPES; i++) - mes_set_hw_res_pkt.compute_hqd_mask[i] = - mes->compute_hqd_mask[i]; - - for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; - - for (i = 0; i < MAX_SDMA_PIPES; i++) - mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; + if (pipe == AMDGPU_MES_SCHED_PIPE) { + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = + mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.aggregated_doorbells[i] = + mes->aggregated_doorbells[i]; + } - for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) - mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->aggregated_doorbells[i]; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = + mes->sch_ctx_gpu_addr[pipe]; + mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = + mes->query_status_fence_gpu_addr[pipe]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -556,7 +622,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } @@ -636,6 +702,44 @@ static void mes_v12_0_enable_unmapped_doorbell_handling( WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data); } +static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, + struct mes_reset_legacy_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + int pipe; + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + + if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) { + mes_reset_queue_pkt.reset_legacy_gfx = 1; + mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; + mes_reset_queue_pkt.queue_id_lp = input->queue_id; + mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; + mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; + mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; + mes_reset_queue_pkt.vmid_id_lp = input->vmid; + } else { + mes_reset_queue_pkt.reset_queue_only = 1; + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + } + + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .add_hw_queue = mes_v12_0_add_hw_queue, .remove_hw_queue = mes_v12_0_remove_hw_queue, @@ -644,6 +748,8 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { .suspend_gang = mes_v12_0_suspend_gang, .resume_gang = mes_v12_0_resume_gang, .misc_op = mes_v12_0_misc_op, + .reset_legacy_queue = mes_v12_0_reset_legacy_queue, + .reset_hw_queue = mes_v12_0_reset_hw_queue, }; static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev, @@ -734,16 +840,11 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) if (enable) { data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - soc21_grbm_select(adev, 3, pipe, 0, 0); ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; @@ -757,8 +858,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) /* unhalt MES and activate pipe0 */ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); if (amdgpu_emu_mode) @@ -774,8 +874,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) data = REG_SET_FIELD(data, CP_MES_CNTL, MES_INVALIDATE_ICACHE, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); } @@ -790,10 +889,6 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - /* me=3, queue=0 */ soc21_grbm_select(adev, 3, pipe, 0, 0); @@ -1085,7 +1180,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); r = amdgpu_ring_test_ring(kiq_ring); if (r) { @@ -1101,14 +1196,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, struct amdgpu_ring *ring; int r; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; else - BUG(); + ring = &adev->mes.ring[pipe]; - if ((pipe == AMDGPU_MES_SCHED_PIPE) && + if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && (amdgpu_in_reset(adev) || adev->in_suspend)) { *(ring->wptr_cpu_addr) = 0; *(ring->rptr_cpu_addr) = 0; @@ -1120,13 +1213,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return r; if (pipe == AMDGPU_MES_SCHED_PIPE) { - if (adev->enable_uni_mes) { - mes_v12_0_queue_init_register(ring); - } else { + if (adev->enable_uni_mes) + r = amdgpu_mes_map_legacy_queue(adev, ring); + else r = mes_v12_0_kiq_enable_queue(adev); - if (r) - return r; - } + if (r) + return r; } else { mes_v12_0_queue_init_register(ring); } @@ -1146,25 +1238,29 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return 0; } -static int mes_v12_0_ring_init(struct amdgpu_device *adev) +static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[pipe]; ring->funcs = &mes_v12_0_ring_funcs; ring->me = 3; - ring->pipe = 0; + ring->pipe = pipe; ring->queue = 0; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; - ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE]; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe]; ring->no_scheduler = true; sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); + if (pipe == AMDGPU_MES_SCHED_PIPE) + ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1; + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); } @@ -1178,7 +1274,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev) ring = &adev->gfx.kiq[0].ring; ring->me = 3; - ring->pipe = adev->enable_uni_mes ? 0 : 1; + ring->pipe = 1; ring->queue = 0; ring->adev = NULL; @@ -1200,12 +1296,10 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, int r, mqd_size = sizeof(struct v12_compute_mqd); struct amdgpu_ring *ring; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; else - BUG(); + ring = &adev->mes.ring[pipe]; if (ring->mqd_obj) return 0; @@ -1246,9 +1340,6 @@ static int mes_v12_0_sw_init(void *handle) return r; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; - r = mes_v12_0_allocate_eop_buf(adev, pipe); if (r) return r; @@ -1256,18 +1347,15 @@ static int mes_v12_0_sw_init(void *handle) r = mes_v12_0_mqd_sw_init(adev, pipe); if (r) return r; - } - if (adev->enable_mes_kiq) { - r = mes_v12_0_kiq_ring_init(adev); + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + r = mes_v12_0_kiq_ring_init(adev); + else + r = mes_v12_0_ring_init(adev, pipe); if (r) return r; } - r = mes_v12_0_ring_init(adev); - if (r) - return r; - return 0; } @@ -1276,9 +1364,6 @@ static int mes_v12_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1286,18 +1371,21 @@ static int mes_v12_0_sw_fini(void *handle) &adev->mes.eop_gpu_addr[pipe], NULL); amdgpu_ucode_release(&adev->mes.fw[pipe]); - } - amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, - &adev->gfx.kiq[0].ring.mqd_gpu_addr, - &adev->gfx.kiq[0].ring.mqd_ptr); - - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { + amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj, + &adev->mes.ring[pipe].mqd_gpu_addr, + &adev->mes.ring[pipe].mqd_ptr); + amdgpu_ring_fini(&adev->mes.ring[pipe]); + } + } - amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + if (!adev->enable_uni_mes) { + amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, + &adev->gfx.kiq[0].ring.mqd_gpu_addr, + &adev->gfx.kiq[0].ring.mqd_ptr); + amdgpu_ring_fini(&adev->gfx.kiq[0].ring); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1341,7 +1429,7 @@ static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) @@ -1362,10 +1450,10 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; - mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); - if (adev->enable_uni_mes) - return mes_v12_0_hw_init(adev); + mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]); + else + mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1392,6 +1480,14 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) if (r) goto failure; + if (adev->enable_uni_mes) { + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE); + if (r) + goto failure; + + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE); + } + r = mes_v12_0_hw_init(adev); if (r) goto failure; @@ -1405,9 +1501,15 @@ failure: static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v12_0_kiq_dequeue_sched(adev); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + if (adev->enable_uni_mes) + amdgpu_mes_unmap_legacy_queue(adev, + &adev->mes.ring[AMDGPU_MES_SCHED_PIPE], + RESET_QUEUES, 0, 0); + else + mes_v12_0_kiq_dequeue_sched(adev); + + adev->mes.ring[0].sched.ready = false; } mes_v12_0_enable(adev, false); @@ -1420,10 +1522,10 @@ static int mes_v12_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; - if (!adev->enable_mes_kiq || adev->enable_uni_mes) { + if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, true); @@ -1443,23 +1545,23 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_enable(adev, true); } + /* Enable the MES to handle doorbell ring on unmapped queue */ + mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); + r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; - r = mes_v12_0_set_hw_resources(&adev->mes); + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; if (adev->enable_uni_mes) - mes_v12_0_set_hw_resources_1(&adev->mes); + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); - /* Enable the MES to handle doorbell ring on unmapped queue */ - mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); - - r = mes_v12_0_query_sched_status(&adev->mes); + r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) { DRM_ERROR("MES is busy\n"); goto failure; @@ -1472,7 +1574,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; @@ -1515,17 +1617,7 @@ static int mes_v12_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe, r; - if (adev->enable_uni_mes) { - r = amdgpu_mes_init_microcode(adev, AMDGPU_MES_SCHED_PIPE); - if (!r) - return 0; - - adev->enable_uni_mes = false; - } - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; r = amdgpu_mes_init_microcode(adev, pipe); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 621761a17ac7..b01bb759d0f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -559,22 +559,6 @@ static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags) } -static bool mmhub_v1_8_query_utcl2_poison_status(struct amdgpu_device *adev, - int hub_inst) -{ - u32 fed, status; - - status = RREG32_SOC15(MMHUB, hub_inst, regVM_L2_PROTECTION_FAULT_STATUS); - fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); - if (!amdgpu_sriov_vf(adev)) { - /* clear page fault status and address */ - WREG32_P(SOC15_REG_OFFSET(MMHUB, hub_inst, - regVM_L2_PROTECTION_FAULT_CNTL), 1, ~1); - } - - return fed; -} - const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .get_fb_location = mmhub_v1_8_get_fb_location, .init = mmhub_v1_8_init, @@ -584,7 +568,6 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs, .set_clockgating = mmhub_v1_8_set_clockgating, .get_clockgating = mmhub_v1_8_get_clockgating, - .query_utcl2_poison_status = mmhub_v1_8_query_utcl2_poison_status, }; static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = { @@ -670,8 +653,8 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); } static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 5bbaa2b2caab..0fbc3be81f14 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -80,7 +80,8 @@ static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, /* invalidate using legacy mode on vmid*/ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + /* Only use legacy inv on mmhub side */ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index caf616a2c8a6..1d099ffb3a5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -25,7 +25,7 @@ #define __MXGPU_NV_H__ #define NV_MAILBOX_POLL_ACK_TIMEDOUT 500 -#define NV_MAILBOX_POLL_MSG_TIMEDOUT 6000 +#define NV_MAILBOX_POLL_MSG_TIMEDOUT 15000 #define NV_MAILBOX_POLL_FLR_TIMEDOUT 10000 #define NV_MAILBOX_POLL_MSG_REP_MAX 11 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 772604feb6ac..23ef4eb36b40 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -72,6 +72,53 @@ MODULE_FIRMWARE("amdgpu/renoir_sdma.bin"); MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_sdma.bin"); +static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL) +}; + #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L @@ -1750,6 +1797,8 @@ static int sdma_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1870,6 +1919,13 @@ static int sdma_v4_0_sw_init(void *handle) return -EINVAL; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1890,6 +1946,8 @@ static int sdma_v4_0_sw_fini(void *handle) else amdgpu_sdma_destroy_inst_ctx(adev, false); + kfree(adev->sdma.ip_dump); + return 0; } @@ -2292,6 +2350,48 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v4_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v4_0_get_reg_offset(adev, i, + sdma_reg_list_4_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .name = "sdma_v4_0", .early_init = sdma_v4_0_early_init, @@ -2308,6 +2408,8 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = { .set_clockgating_state = sdma_v4_0_set_clockgating_state, .set_powergating_state = sdma_v4_0_set_powergating_state, .get_clockgating_state = sdma_v4_0_get_clockgating_state, + .dump_ip_state = sdma_v4_0_dump_ip_state, + .print_ip_state = sdma_v4_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 2c55bfd935bb..c77889040760 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -46,6 +46,53 @@ MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin"); +static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA_VM_CNTL) +}; + #define mmSMNAID_AID0_MCA_SMU 0x03b30400 #define WREG32_SDMA(instance, offset, value) \ @@ -1291,6 +1338,8 @@ static int sdma_v4_4_2_sw_init(void *handle) int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 aid_id; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_inst_per_aid; i++) { @@ -1386,6 +1435,13 @@ static int sdma_v4_4_2_sw_init(void *handle) return -EINVAL; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1406,6 +1462,8 @@ static int sdma_v4_4_2_sw_fini(void *handle) else amdgpu_sdma_destroy_inst_ctx(adev, false); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1799,6 +1857,48 @@ static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v4_4_2_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_4_4_2[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v4_4_2_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v4_4_2_get_reg_offset(adev, i, + sdma_reg_list_4_4_2[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { .name = "sdma_v4_4_2", .early_init = sdma_v4_4_2_early_init, @@ -1815,6 +1915,8 @@ const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { .set_clockgating_state = sdma_v4_4_2_set_clockgating_state, .set_powergating_state = sdma_v4_4_2_set_powergating_state, .get_clockgating_state = sdma_v4_4_2_get_clockgating_state, + .dump_ip_state = sdma_v4_4_2_dump_ip_state, + .print_ip_state = sdma_v4_4_2_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { @@ -2141,7 +2243,7 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); } static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index b7d33d78bce0..3e48ea38385d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -59,6 +59,55 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin"); #define SDMA0_HYP_DEC_REG_END 0x5893 #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2) +}; + static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1341,6 +1390,8 @@ static int sdma_v5_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); + uint32_t *ptr; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, @@ -1378,6 +1429,13 @@ static int sdma_v5_0_sw_init(void *handle) return r; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1391,6 +1449,8 @@ static int sdma_v5_0_sw_fini(void *handle) amdgpu_sdma_destroy_inst_ctx(adev, false); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1718,7 +1778,49 @@ static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } -const struct amd_ip_funcs sdma_v5_0_ip_funcs = { +static void sdma_v5_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v5_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v5_0_get_reg_offset(adev, i, + sdma_reg_list_5_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + +static const struct amd_ip_funcs sdma_v5_0_ip_funcs = { .name = "sdma_v5_0", .early_init = sdma_v5_0_early_init, .late_init = NULL, @@ -1734,6 +1836,8 @@ const struct amd_ip_funcs sdma_v5_0_ip_funcs = { .set_clockgating_state = sdma_v5_0_set_clockgating_state, .set_powergating_state = sdma_v5_0_set_powergating_state, .get_clockgating_state = sdma_v5_0_get_clockgating_state, + .dump_ip_state = sdma_v5_0_dump_ip_state, + .print_ip_state = sdma_v5_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h index d4e3c2e696f6..2ab71f21755a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h @@ -24,7 +24,6 @@ #ifndef __SDMA_V5_0_H__ #define __SDMA_V5_0_H__ -extern const struct amd_ip_funcs sdma_v5_0_ip_funcs; extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block; #endif /* __SDMA_V5_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index af1e90159ce3..bc9b240a3488 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -60,6 +60,55 @@ MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin"); #define SDMA0_HYP_DEC_REG_END 0x5893 #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_5_2[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_GFX_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_PAGE_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_RLC0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSDMA0_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2) +}; + static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -176,14 +225,16 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - /* SDMA seems to miss doorbells sometimes when powergating kicks in. - * Updating the wptr directly will wake it. This is only safe because - * we disallow gfxoff in begin_use() and then allow it again in end_use(). - */ - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) { + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " @@ -1222,6 +1273,8 @@ static int sdma_v5_2_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); + uint32_t *ptr; /* SDMA trap event */ for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1253,6 +1306,13 @@ static int sdma_v5_2_sw_init(void *handle) return r; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1266,6 +1326,8 @@ static int sdma_v5_2_sw_fini(void *handle) amdgpu_sdma_destroy_inst_ctx(adev, true); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1674,7 +1736,49 @@ static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) amdgpu_gfx_off_ctrl(adev, true); } -const struct amd_ip_funcs sdma_v5_2_ip_funcs = { +static void sdma_v5_2_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_5_2[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v5_2_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v5_2_get_reg_offset(adev, i, + sdma_reg_list_5_2[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + +static const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, .late_init = NULL, @@ -1690,6 +1794,8 @@ const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .set_clockgating_state = sdma_v5_2_set_clockgating_state, .set_powergating_state = sdma_v5_2_set_powergating_state, .get_clockgating_state = sdma_v5_2_get_clockgating_state, + .dump_ip_state = sdma_v5_2_dump_ip_state, + .print_ip_state = sdma_v5_2_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h index b70414fef2a1..863145b3a77e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.h @@ -24,7 +24,6 @@ #ifndef __SDMA_V5_2_H__ #define __SDMA_V5_2_H__ -extern const struct amd_ip_funcs sdma_v5_2_ip_funcs; extern const struct amdgpu_ip_block_version sdma_v5_2_ip_block; #endif /* __SDMA_V5_2_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index dab4c2db8c9d..208a1fa9d4e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -57,6 +57,63 @@ MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin"); #define SDMA0_HYP_DEC_REG_END 0x589a #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_6_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_CHECKSUM), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS), +}; + static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1239,6 +1296,8 @@ static int sdma_v6_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); + uint32_t *ptr; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, @@ -1274,6 +1333,13 @@ static int sdma_v6_0_sw_init(void *handle) return -EINVAL; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1287,6 +1353,8 @@ static int sdma_v6_0_sw_fini(void *handle) amdgpu_sdma_destroy_inst_ctx(adev, true); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1488,6 +1556,48 @@ static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags) { } +static void sdma_v6_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_6_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v6_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v6_0_get_reg_offset(adev, i, + sdma_reg_list_6_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v6_0_ip_funcs = { .name = "sdma_v6_0", .early_init = sdma_v6_0_early_init, @@ -1505,6 +1615,8 @@ const struct amd_ip_funcs sdma_v6_0_ip_funcs = { .set_clockgating_state = sdma_v6_0_set_clockgating_state, .set_powergating_state = sdma_v6_0_set_powergating_state, .get_clockgating_state = sdma_v6_0_get_clockgating_state, + .dump_ip_state = sdma_v6_0_dump_ip_state, + .print_ip_state = sdma_v6_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 41b5e45697dc..cfd8e183ad50 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -51,6 +51,64 @@ MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin"); #define SDMA0_HYP_DEC_REG_END 0x589a #define SDMA1_HYP_DEC_REG_OFFSET 0x20 +static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS1_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS2_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS3_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS4_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS5_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_STATUS6_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UCODE_REV), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_RB_RPTR_FETCH), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_RD_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_UTCL1_WR_XNACK1), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE0_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE1_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_RPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_RB_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_IB_SUB_REMAIN), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_QUEUE2_DUMMY_REG), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_INT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_VM_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_CHICKEN_BITS), +}; + static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v7_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -1217,6 +1275,8 @@ static int sdma_v7_0_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); + uint32_t *ptr; /* SDMA trap event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, @@ -1247,6 +1307,13 @@ static int sdma_v7_0_sw_init(void *handle) return r; } + /* Allocate memory for SDMA IP Dump buffer */ + ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr) + adev->sdma.ip_dump = ptr; + else + DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); + return r; } @@ -1263,6 +1330,8 @@ static int sdma_v7_0_sw_fini(void *handle) if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) sdma_v12_0_free_ucode_buffer(adev); + kfree(adev->sdma.ip_dump); + return 0; } @@ -1466,6 +1535,48 @@ static void sdma_v7_0_get_clockgating_state(void *handle, u64 *flags) { } +static void sdma_v7_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); + uint32_t instance_offset; + + if (!adev->sdma.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + drm_printf(p, "\nInstance:%d\n", i); + + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_0[j].reg_name, + adev->sdma.ip_dump[instance_offset + j]); + } +} + +static void sdma_v7_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t instance_offset; + uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0); + + if (!adev->sdma.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < adev->sdma.num_instances; i++) { + instance_offset = i * reg_count; + for (j = 0; j < reg_count; j++) + adev->sdma.ip_dump[instance_offset + j] = + RREG32(sdma_v7_0_get_reg_offset(adev, i, + sdma_reg_list_7_0[j].reg_offset)); + } + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v7_0_ip_funcs = { .name = "sdma_v7_0", .early_init = sdma_v7_0_early_init, @@ -1483,6 +1594,8 @@ const struct amd_ip_funcs sdma_v7_0_ip_funcs = { .set_clockgating_state = sdma_v7_0_set_clockgating_state, .set_powergating_state = sdma_v7_0_set_powergating_state, .get_clockgating_state = sdma_v7_0_get_clockgating_state, + .dump_ip_state = sdma_v7_0_dump_ip_state, + .print_ip_state = sdma_v7_0_print_ip_state, }; static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = { @@ -1575,8 +1688,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) | - SDMA_PKT_COPY_LINEAR_HEADER_CPV((copy_flags & - (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)) ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_CPV(1); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ @@ -1590,6 +1702,8 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); + else + ib->ptr[ib->length_dw++] = 0; } /** @@ -1616,7 +1730,7 @@ static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib, static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = { .copy_max_bytes = 0x400000, - .copy_num_dw = 7, + .copy_num_dw = 8, .emit_copy_buffer = sdma_v7_0_emit_copy_buffer, .fill_max_bytes = 0x400000, .fill_num_dw = 5, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 282584a48be0..ef7c603b50ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -93,6 +93,10 @@ struct soc15_ras_field_entry { #define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) +/* Over ride the instance id */ +#define SOC15_REG_ENTRY_OFFSET_INST(entry, inst) \ + (adev->reg_offset[entry.hwip][inst][entry.seg] + entry.reg_offset) + #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 2357ff39323f..b9cbeb389edc 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -76,6 +76,12 @@ ((cond & 0xF) << 24) | \ ((type & 0xF) << 28)) +#define CP_PACKETJ_NOP 0x60000000 +#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF) +#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F) +#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF) +#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF) + /* Packet 3 types */ #define PACKET3_NOP 0x10 #define PACKET3_SET_BASE 0x11 @@ -407,6 +413,10 @@ # define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) +#define PACKET3_RUN_CLEANER_SHADER 0xD2 +/* 1. header + * 2. RESERVED [31:0] + */ #define VCE_CMD_NO_OP 0x00000000 #define VCE_CMD_END 0x00000001 diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index 7d641d0dadba..b0c3678cfb31 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -406,6 +406,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | @@ -424,6 +425,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 9dbb13adb661..1a8ea834efa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -157,9 +157,9 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev, umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, &de_count, umc_v12_0_is_deferred_error); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); - amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, de_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, de_count); return 0; } @@ -225,26 +225,16 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, } } -static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, - struct ta_ras_query_address_input *addr_in, - uint64_t *pfns, int len) +static void umc_v12_0_dump_addr_info(struct amdgpu_device *adev, + struct ta_ras_query_address_output *addr_out, + uint64_t err_addr) { uint32_t col, row, row_xor, bank, channel_index; - uint64_t soc_pa, retired_page, column, err_addr; - struct ta_ras_query_address_output addr_out; - uint32_t pos = 0; - - err_addr = addr_in->ma.err_addr; - addr_in->addr_type = TA_RAS_MCA_TO_PA; - if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { - dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", - err_addr); - return 0; - } + uint64_t soc_pa, retired_page, column; - soc_pa = addr_out.pa.pa; - bank = addr_out.pa.bank; - channel_index = addr_out.pa.channel_idx; + soc_pa = addr_out->pa.pa; + bank = addr_out->pa.bank; + channel_index = addr_out->pa.channel_idx; col = (err_addr >> 1) & 0x1fULL; row = (err_addr >> 10) & 0x3fffULL; @@ -258,11 +248,6 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); - - if (pos >= len) - return 0; - pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - /* include column bit 0 and 1 */ col &= 0x3; col |= (column << 2); @@ -272,19 +257,74 @@ static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, /* shift R13 bit */ retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row_xor, col, bank, channel_index); + } +} + +static int umc_v12_0_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, + uint64_t pa_addr, uint64_t *pfns, int len) +{ + uint64_t soc_pa, retired_page, column; + uint32_t pos = 0; + + soc_pa = pa_addr; + /* clear [C3 C2] in soc physical address */ + soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); + /* clear [C4] in soc physical address */ + soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + + /* loop for all possibilities of [C4 C3 C2] */ + for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { + retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); + retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); + + if (pos >= len) + return 0; + pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + + /* shift R13 bit */ + retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); if (pos >= len) return 0; pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row_xor, col, bank, channel_index); } return pos; } +static int umc_v12_0_convert_mca_to_addr(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch, uint32_t umc, + uint32_t node, uint32_t socket, + uint64_t *addr, bool dump_addr) +{ + struct ta_ras_query_address_input addr_in; + struct ta_ras_query_address_output addr_out; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = ch; + addr_in.ma.umc_inst = umc; + addr_in.ma.node_inst = node; + addr_in.ma.socket_id = socket; + addr_in.addr_type = TA_RAS_MCA_TO_PA; + if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); + return -EINVAL; + } + + if (dump_addr) + umc_v12_0_dump_addr_info(adev, &addr_out, err_addr); + + *addr = addr_out.pa.pa; + + return 0; +} + static int umc_v12_0_query_error_address(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data) @@ -483,12 +523,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); uint16_t hwid, mcatype; - struct ta_ras_query_address_input addr_in; uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; - uint64_t err_addr, hash_val = 0; + uint64_t err_addr, pa_addr = 0; struct ras_ecc_err *ecc_err; - int count; - int ret; + int count, ret, i; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); @@ -514,46 +552,25 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, MCA_IPID_2_UMC_CH(ipid), err_addr); - memset(page_pfn, 0, sizeof(page_pfn)); - - memset(&addr_in, 0, sizeof(addr_in)); - addr_in.ma.err_addr = err_addr; - addr_in.ma.ch_inst = MCA_IPID_2_UMC_CH(ipid); - addr_in.ma.umc_inst = MCA_IPID_2_UMC_INST(ipid); - addr_in.ma.node_inst = MCA_IPID_2_DIE_ID(ipid); - addr_in.ma.socket_id = MCA_IPID_2_SOCKET_ID(ipid); - - count = umc_v12_0_convert_err_addr(adev, - &addr_in, page_pfn, ARRAY_SIZE(page_pfn)); - if (count <= 0) { - dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count); - return 0; - } - - ret = amdgpu_umc_build_pages_hash(adev, - page_pfn, count, &hash_val); - if (ret) { - dev_err(adev->dev, "Fail to build error pages hash\n"); + ret = umc_v12_0_convert_mca_to_addr(adev, + err_addr, MCA_IPID_2_UMC_CH(ipid), + MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid), + MCA_IPID_2_SOCKET_ID(ipid), &pa_addr, true); + if (ret) return ret; - } ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL); if (!ecc_err) return -ENOMEM; - ecc_err->err_pages.pfn = kcalloc(count, sizeof(*ecc_err->err_pages.pfn), GFP_KERNEL); - if (!ecc_err->err_pages.pfn) { - kfree(ecc_err); - return -ENOMEM; - } - - memcpy(ecc_err->err_pages.pfn, page_pfn, count * sizeof(*ecc_err->err_pages.pfn)); - ecc_err->err_pages.count = count; - - ecc_err->hash_index = hash_val; ecc_err->status = status; ecc_err->ipid = ipid; ecc_err->addr = addr; + ecc_err->pa_pfn = UMC_V12_ADDR_MASK_BAD_COLS(pa_addr) >> AMDGPU_GPU_PAGE_SHIFT; + + /* If converted pa_pfn is 0, use pa C4 pfn. */ + if (!ecc_err->pa_pfn) + ecc_err->pa_pfn = BIT_ULL(UMC_V12_0_PA_C4_BIT) >> AMDGPU_GPU_PAGE_SHIFT; ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); if (ret) { @@ -562,13 +579,25 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, else dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret); - kfree(ecc_err->err_pages.pfn); kfree(ecc_err); return ret; } con->umc_ecc_log.de_queried_count++; + memset(page_pfn, 0, sizeof(page_pfn)); + count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + pa_addr, + page_pfn, ARRAY_SIZE(page_pfn)); + if (count <= 0) { + dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count); + return 0; + } + + /* Reserve memory */ + for (i = 0; i < count; i++) + amdgpu_ras_reserve_page(adev, page_pfn[i]); + /* The problem case is as follows: * 1. GPU A triggers a gpu ras reset, and GPU A drives * GPU B to also perform a gpu ras reset. @@ -593,16 +622,21 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, struct ras_ecc_err *ecc_err, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - uint32_t i = 0; - int ret = 0; + uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; + int ret, i, count; if (!err_data || !ecc_err) return -EINVAL; - for (i = 0; i < ecc_err->err_pages.count; i++) { + memset(page_pfn, 0, sizeof(page_pfn)); + count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT, + page_pfn, ARRAY_SIZE(page_pfn)); + + for (i = 0; i < count; i++) { ret = amdgpu_umc_fill_error_record(err_data, ecc_err->addr, - ecc_err->err_pages.pfn[i] << AMDGPU_GPU_PAGE_SHIFT, + page_pfn[i] << AMDGPU_GPU_PAGE_SHIFT, MCA_IPID_2_UMC_CH(ecc_err->ipid), MCA_IPID_2_UMC_INST(ecc_err->ipid)); if (ret) @@ -636,7 +670,8 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev, dev_err(adev->dev, "Fail to fill umc error record, ret:%d\n", ret); break; } - radix_tree_tag_clear(ecc_tree, entries[i]->hash_index, UMC_ECC_NEW_DETECTED_TAG); + radix_tree_tag_clear(ecc_tree, + entries[i]->pa_pfn, UMC_ECC_NEW_DETECTED_TAG); } mutex_unlock(&con->umc_ecc_log.lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index b4974793850b..be5598d76c1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -81,6 +81,11 @@ (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \ (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03)) +#define UMC_V12_ADDR_MASK_BAD_COLS(addr) \ + ((addr) & ~((0x3ULL << UMC_V12_0_PA_C2_BIT) | \ + (0x1ULL << UMC_V12_0_PA_C4_BIT) | \ + (0x1ULL << UMC_V12_0_PA_R13_BIT))) + bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 32517c364cf7..4bfba2931b08 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -950,7 +950,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, - .parse_cs = amdgpu_vce_ring_parse_cs_vm, + .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm, .emit_frame_size = 6 + /* vce_v3_0_emit_vm_flush */ 4 + /* vce_v3_0_emit_pipeline_sync */ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 06d787385ad4..0748bf44c880 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1102,7 +1102,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, - .parse_cs = amdgpu_vce_ring_parse_cs_vm, + .patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index a280b9fecb77..ecdfbfefd66a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -45,6 +45,42 @@ #define mmUVD_REG_XX_MASK_1_0 0x05ac #define mmUVD_REG_XX_MASK_1_0_BASE_IDX 1 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_1_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); @@ -90,6 +126,8 @@ static int vcn_v1_0_sw_init(void *handle) { struct amdgpu_ring *ring; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCN DEC TRAP */ @@ -161,6 +199,14 @@ static int vcn_v1_0_sw_init(void *handle) r = jpeg_v1_0_sw_init(handle); + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return r; } @@ -184,6 +230,8 @@ static int vcn_v1_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1877,6 +1925,66 @@ void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring) mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround); } +static void vcn_v1_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_1_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v1_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_1_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .name = "vcn_v1_0", .early_init = vcn_v1_0_early_init, @@ -1895,8 +2003,8 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, .set_clockgating_state = vcn_v1_0_set_clockgating_state, .set_powergating_state = vcn_v1_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v1_0_dump_ip_state, + .print_ip_state = vcn_v1_0_print_ip_state, }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index d3d096909a7f..bfd067e2d2f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -53,6 +53,42 @@ #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); @@ -96,6 +132,8 @@ static int vcn_v2_0_sw_init(void *handle) { struct amdgpu_ring *ring; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; volatile struct amdgpu_fw_shared *fw_shared; @@ -184,6 +222,15 @@ static int vcn_v2_0_sw_init(void *handle) if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(adev->vcn.inst); + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -213,6 +260,8 @@ static int vcn_v2_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1985,6 +2034,66 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) return vcn_v2_0_start_mmsch(adev, &adev->virt.mm_table); } +static void vcn_v2_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v2_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .name = "vcn_v2_0", .early_init = vcn_v2_0_early_init, @@ -2003,8 +2112,8 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_v2_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v2_0_dump_ip_state, + .print_ip_state = vcn_v2_0_print_ip_state, }; static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 96f60c303161..04e9e806e318 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -55,6 +55,43 @@ #define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); @@ -122,6 +159,8 @@ static int vcn_v2_5_sw_init(void *handle) { struct amdgpu_ring *ring; int i, j, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (j = 0; j < adev->vcn.num_vcn_inst; j++) { @@ -241,6 +280,15 @@ static int vcn_v2_5_sw_init(void *handle) if (r) return r; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -277,6 +325,8 @@ static int vcn_v2_5_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1876,6 +1926,66 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v2_5_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_2_5[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v2_5_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_2_5[j], i)); + } +} + static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .name = "vcn_v2_5", .early_init = vcn_v2_5_early_init, @@ -1894,8 +2004,8 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v2_5_dump_ip_state, + .print_ip_state = vcn_v2_5_print_ip_state, }; static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { @@ -1916,8 +2026,8 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v2_5_dump_ip_state, + .print_ip_state = vcn_v2_5_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v2_5_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 24f947751c46..65dd68b32280 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -60,6 +60,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_3_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -126,6 +162,8 @@ static int vcn_v3_0_sw_init(void *handle) struct amdgpu_ring *ring; int i, j, r; int vcn_doorbell_index = 0; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t *ptr; struct amdgpu_device *adev = (struct amdgpu_device *)handle; r = amdgpu_vcn_sw_init(adev); @@ -246,6 +284,15 @@ static int vcn_v3_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -284,6 +331,7 @@ static int vcn_v3_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); return r; } @@ -2203,6 +2251,67 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + uint32_t inst_off; + bool is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_3_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v3_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, mmUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_3_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .name = "vcn_v3_0", .early_init = vcn_v3_0_early_init, @@ -2221,8 +2330,8 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v3_0_dump_ip_state, + .print_ip_state = vcn_v3_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 776c539bfdda..26c6f10a8c8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -52,6 +52,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -137,6 +173,8 @@ static int vcn_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -200,6 +238,15 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -239,6 +286,8 @@ static int vcn_v4_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -2109,6 +2158,67 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v4_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0[j], + i)); + } +} + static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .name = "vcn_v4_0", .early_init = vcn_v4_0_early_init, @@ -2127,8 +2237,8 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_v4_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v4_0_dump_ip_state, + .print_ip_state = vcn_v4_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 9bae95538b62..0fda70336300 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,42 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + #define NORMALIZE_VCN_REG_OFFSET(offset) \ (offset & 0x1FFFF) @@ -92,6 +128,8 @@ static int vcn_v4_0_3_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; int i, r, vcn_inst; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -159,6 +197,15 @@ static int vcn_v4_0_3_sw_init(void *handle) } } + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } + return 0; } @@ -194,6 +241,8 @@ static int vcn_v4_0_3_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1684,6 +1733,68 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs; } +static void vcn_v4_0_3_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_3[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v4_0_3_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off, inst_id; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_id = GET_INST(VCN, i); + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, inst_id, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_3[j], + inst_id)); + } +} + static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .name = "vcn_v4_0_3", .early_init = vcn_v4_0_3_early_init, @@ -1702,8 +1813,8 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_v4_0_3_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v4_0_3_dump_ip_state, + .print_ip_state = vcn_v4_0_3_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 8d75061f9f38..b1fd226b7efb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -52,6 +52,42 @@ #define RDECODE_MSG_CREATE 0x00000000 #define RDECODE_MESSAGE_CREATE 0x00000001 +static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_5[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_CONFIG), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_PGFSM_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -97,6 +133,8 @@ static int vcn_v4_0_5_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -168,6 +206,14 @@ static int vcn_v4_0_5_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return 0; } @@ -207,6 +253,8 @@ static int vcn_v4_0_5_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1733,6 +1781,67 @@ static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v4_0_5_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_4_0_5[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v4_0_5_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_4_0_5[j], + i)); + } +} + static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .name = "vcn_v4_0_5", .early_init = vcn_v4_0_5_early_init, @@ -1751,8 +1860,8 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_v4_0_5_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v4_0_5_dump_ip_state, + .print_ip_state = vcn_v4_0_5_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 68c97fcd539b..c305386358b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -37,6 +37,40 @@ #include <drm/drm_drv.h> +static const struct amdgpu_hwip_reg_entry vcn_reg_list_5_0[] = { + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_STATUS), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_CONTEXT_ID2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA0), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_DATA1), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_GPCOM_VCPU_CMD), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_HI4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_BASE_LO4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_RPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_WPTR4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE2), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE3), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_RB_SIZE4), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_CTL), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_DATA), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_LMA_MASK), + SOC15_REG_ENTRY_STR(VCN, 0, regUVD_DPG_PAUSE) +}; + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -83,6 +117,8 @@ static int vcn_v5_0_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -137,6 +173,14 @@ static int vcn_v5_0_0_sw_init(void *handle) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } return 0; } @@ -173,6 +217,8 @@ static int vcn_v5_0_0_sw_fini(void *handle) r = amdgpu_vcn_sw_fini(adev); + kfree(adev->vcn.ip_dump); + return r; } @@ -1297,6 +1343,66 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } +static void vcn_v5_0_print_ip_state(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t inst_off, is_powered; + + if (!adev->vcn.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i); + continue; + } + + inst_off = i * reg_count; + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) { + drm_printf(p, "\nActive Instance:VCN%d\n", i); + for (j = 0; j < reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", vcn_reg_list_5_0[j].reg_name, + adev->vcn.ip_dump[inst_off + j]); + } else { + drm_printf(p, "\nInactive Instance:VCN%d\n", i); + } + } +} + +static void vcn_v5_0_dump_ip_state(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, j; + bool is_powered; + uint32_t inst_off; + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + + if (!adev->vcn.ip_dump) + return; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + inst_off = i * reg_count; + /* mmUVD_POWER_STATUS is always readable and is first element of the array */ + adev->vcn.ip_dump[inst_off] = RREG32_SOC15(VCN, i, regUVD_POWER_STATUS); + is_powered = (adev->vcn.ip_dump[inst_off] & + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK) != 1; + + if (is_powered) + for (j = 1; j < reg_count; j++) + adev->vcn.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(vcn_reg_list_5_0[j], i)); + } +} + static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .name = "vcn_v5_0_0", .early_init = vcn_v5_0_0_early_init, @@ -1315,8 +1421,8 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, - .dump_ip_state = NULL, - .print_ip_state = NULL, + .dump_ip_state = vcn_v5_0_dump_ip_state, + .print_ip_state = vcn_v5_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 80ce42aacc0c..b61f6b838ec2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -246,6 +246,7 @@ * 1 - Stream * 2 - Bypass */ +#define EOP_EXEC (1 << 28) /* For Trailing Fence */ #define DATA_SEL(x) ((x) << 29) /* 0 - discard * 1 - send low 32bit data diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 32e5db509560..00350eccd571 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -247,14 +247,15 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->priority = args->queue_priority; q_properties->queue_address = args->ring_base_address; q_properties->queue_size = args->ring_size; - q_properties->read_ptr = (uint32_t *) args->read_pointer_address; - q_properties->write_ptr = (uint32_t *) args->write_pointer_address; + q_properties->read_ptr = (void __user *)args->read_pointer_address; + q_properties->write_ptr = (void __user *)args->write_pointer_address; q_properties->eop_ring_buffer_address = args->eop_buffer_address; q_properties->eop_ring_buffer_size = args->eop_buffer_size; q_properties->ctx_save_restore_area_address = args->ctx_save_restore_address; q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; q_properties->ctl_stack_size = args->ctl_stack_size; + q_properties->sdma_engine_id = args->sdma_engine_id; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; @@ -262,6 +263,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->type = KFD_QUEUE_TYPE_SDMA; else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID) + q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID; else return -ENOTSUPP; @@ -306,7 +309,6 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, struct kfd_process_device *pdd; struct queue_properties q_properties; uint32_t doorbell_offset_in_process = 0; - struct amdgpu_bo *wptr_bo = NULL; memset(&q_properties, 0, sizeof(struct queue_properties)); @@ -334,6 +336,18 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } + if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { + int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) + + kfd_get_num_xgmi_sdma_engines(dev) - 1; + + if (q_properties.sdma_engine_id > max_sdma_eng_id) { + err = -EINVAL; + pr_err("sdma_engine_id %i exceeds maximum id of %i\n", + q_properties.sdma_engine_id, max_sdma_eng_id); + goto err_sdma_engine_id; + } + } + if (!pdd->qpd.proc_doorbells) { err = kfd_alloc_process_doorbells(dev->kfd, pdd); if (err) { @@ -342,53 +356,17 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, } } - /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work - * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) - */ - if (dev->kfd->shared_resources.enable_mes && - ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) - >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { - struct amdgpu_bo_va_mapping *wptr_mapping; - struct amdgpu_vm *wptr_vm; - - wptr_vm = drm_priv_to_vm(pdd->drm_priv); - err = amdgpu_bo_reserve(wptr_vm->root.bo, false); - if (err) - goto err_wptr_map_gart; - - wptr_mapping = amdgpu_vm_bo_lookup_mapping( - wptr_vm, args->write_pointer_address >> PAGE_SHIFT); - amdgpu_bo_unreserve(wptr_vm->root.bo); - if (!wptr_mapping) { - pr_err("Failed to lookup wptr bo\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } - - wptr_bo = wptr_mapping->bo_va->base.bo; - if (wptr_bo->tbo.base.size > PAGE_SIZE) { - pr_err("Requested GART mapping for wptr bo larger than one page\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } - if (dev->adev != amdgpu_ttm_adev(wptr_bo->tbo.bdev)) { - pr_err("Queue memory allocated to wrong device\n"); - err = -EINVAL; - goto err_wptr_map_gart; - } - - err = amdgpu_amdkfd_map_gtt_bo_to_gart(wptr_bo); - if (err) { - pr_err("Failed to map wptr bo to GART\n"); - goto err_wptr_map_gart; - } + err = kfd_queue_acquire_buffers(pdd, &q_properties); + if (err) { + pr_debug("failed to acquire user queue buffers\n"); + goto err_acquire_queue_buf; } pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n", p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo, + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL, &doorbell_offset_in_process); if (err != 0) goto err_create_queue; @@ -422,9 +400,10 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, return 0; err_create_queue: - if (wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo); -err_wptr_map_gart: + kfd_queue_unref_bo_vas(pdd, &q_properties); + kfd_queue_release_buffers(pdd, &q_properties); +err_acquire_queue_buf: +err_sdma_engine_id: err_bind_process: err_pdd: mutex_unlock(&p->mutex); @@ -1422,8 +1401,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv); if (err) { - pr_err("Failed to unmap from gpu %d/%d\n", - i, args->n_devices); + pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices); goto unmap_memory_from_gpu_failed; } args->n_success = i+1; @@ -1963,7 +1941,7 @@ static int criu_checkpoint_bos(struct kfd_process *p, bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo); for (i = 0; i < p->n_pdds; i++) { - if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem)) + if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem)) bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 34a282540c7e..312dfa84f29f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -365,47 +365,47 @@ static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_i *watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID; - spin_lock(&pdd->dev->kfd->watch_points_lock); + spin_lock(&pdd->dev->watch_points_lock); for (i = 0; i < MAX_WATCH_ADDRESSES; i++) { /* device watchpoint in use so skip */ - if ((pdd->dev->kfd->alloc_watch_ids >> i) & 0x1) + if ((pdd->dev->alloc_watch_ids >> i) & 0x1) continue; pdd->alloc_watch_ids |= 0x1 << i; - pdd->dev->kfd->alloc_watch_ids |= 0x1 << i; + pdd->dev->alloc_watch_ids |= 0x1 << i; *watch_id = i; - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); return 0; } - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); return -ENOMEM; } static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id) { - spin_lock(&pdd->dev->kfd->watch_points_lock); + spin_lock(&pdd->dev->watch_points_lock); /* process owns device watch point so safe to clear */ if ((pdd->alloc_watch_ids >> watch_id) & 0x1) { pdd->alloc_watch_ids &= ~(0x1 << watch_id); - pdd->dev->kfd->alloc_watch_ids &= ~(0x1 << watch_id); + pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id); } - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); } static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id) { bool owns_watch_id = false; - spin_lock(&pdd->dev->kfd->watch_points_lock); + spin_lock(&pdd->dev->watch_points_lock); owns_watch_id = watch_id < MAX_WATCH_ADDRESSES && ((pdd->alloc_watch_ids >> watch_id) & 0x1); - spin_unlock(&pdd->dev->kfd->watch_points_lock); + spin_unlock(&pdd->dev->watch_points_lock); return owns_watch_id; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index f4d20adaa068..fad1c8f2bc83 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -884,13 +884,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, dev_err(kfd_device, "Error initializing KFD node\n"); goto node_init_error; } + + spin_lock_init(&node->watch_points_lock); + kfd->nodes[i] = node; } svm_range_set_max_pages(kfd->adev); - spin_lock_init(&kfd->watch_points_lock); - kfd->init_complete = true; dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor, kfd->adev->pdev->device); @@ -907,7 +908,7 @@ node_alloc_error: kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: - amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem); alloc_gtt_mem_failure: dev_err(kfd_device, "device %x:%x NOT added due to errors\n", @@ -925,7 +926,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_doorbell_fini(kfd); ida_destroy(&kfd->doorbell_ida); kfd_gtt_sa_fini(kfd); - amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem); } kfree(kfd); @@ -1445,6 +1446,45 @@ void kgd2kfd_unlock_kfd(void) mutex_unlock(&kfd_processes_mutex); } +int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + int ret; + + if (!kfd->init_complete) + return 0; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return -EINVAL; + } + node = kfd->nodes[node_id]; + + ret = node->dqm->ops.unhalt(node->dqm); + if (ret) + dev_err(kfd_device, "Error in starting scheduler\n"); + + return ret; +} + +int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + + if (!kfd->init_complete) + return 0; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return -EINVAL; + } + + node = kfd->nodes[node_id]; + return node->dqm->ops.halt(node->dqm); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 4f48507418d2..577d121cc6d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -153,6 +153,20 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, static void kfd_hws_hang(struct device_queue_manager *dqm) { + struct device_process_node *cur; + struct qcm_process_device *qpd; + struct queue *q; + + /* Mark all device queues as reset. */ + list_for_each_entry(cur, &dqm->queues, list) { + qpd = cur->qpd; + list_for_each_entry(q, &qpd->queues_list, list) { + struct kfd_process_device *pdd = qpd_to_pdd(qpd); + + pdd->has_reset_queue = true; + } + } + /* * Issue a GPU reset if HWS is unresponsive */ @@ -208,10 +222,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.mqd_addr = q->gart_mqd_addr; queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; - if (q->wptr_bo) { - wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); - queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + wptr_addr_off; - } + wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); + queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off; queue_input.is_kfd_process = 1; queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); @@ -307,6 +319,46 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm) return retval; } +static int suspend_all_queues_mes(struct device_queue_manager *dqm) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + int r = 0; + + if (!down_read_trylock(&adev->reset_domain->sem)) + return -EIO; + + r = amdgpu_mes_suspend(adev); + up_read(&adev->reset_domain->sem); + + if (r) { + dev_err(adev->dev, "failed to suspend gangs from MES\n"); + dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); + } + + return r; +} + +static int resume_all_queues_mes(struct device_queue_manager *dqm) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; + int r = 0; + + if (!down_read_trylock(&adev->reset_domain->sem)) + return -EIO; + + r = amdgpu_mes_resume(adev); + up_read(&adev->reset_domain->sem); + + if (r) { + dev_err(adev->dev, "failed to resume gangs from MES\n"); + dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); + kfd_hws_hang(dqm); + } + + return r; +} + static void increment_queue_count(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -880,6 +932,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, else if (prev_active) retval = remove_queue_mes(dqm, q, &pdd->qpd); + /* queue is reset so inaccessable */ + if (pdd->has_reset_queue) { + retval = -EACCES; + goto out_unlock; + } + if (retval) { dev_err(dev, "unmap queue failed\n"); goto out_unlock; @@ -1534,6 +1592,41 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); q->properties.sdma_queue_id = q->sdma_id / kfd_get_num_xgmi_sdma_engines(dqm->dev); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { + int i, num_queues, num_engines, eng_offset = 0, start_engine; + bool free_bit_found = false, is_xgmi = false; + + if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) { + num_queues = get_num_sdma_queues(dqm); + num_engines = kfd_get_num_sdma_engines(dqm->dev); + q->properties.type = KFD_QUEUE_TYPE_SDMA; + } else { + num_queues = get_num_xgmi_sdma_queues(dqm); + num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev); + eng_offset = kfd_get_num_sdma_engines(dqm->dev); + q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI; + is_xgmi = true; + } + + /* Scan available bit based on target engine ID. */ + start_engine = q->properties.sdma_engine_id - eng_offset; + for (i = start_engine; i < num_queues; i += num_engines) { + + if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap)) + continue; + + clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap); + q->sdma_id = i; + q->properties.sdma_queue_id = q->sdma_id / num_engines; + free_bit_found = true; + break; + } + + if (!free_bit_found) { + dev_err(dev, "No more SDMA queue to allocate for target ID %i\n", + q->properties.sdma_engine_id); + return -ENOMEM; + } } pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); @@ -1626,10 +1719,64 @@ static int initialize_cpsch(struct device_queue_manager *dqm) return 0; } +/* halt_cpsch: + * Unmap queues so the schedule doesn't continue remaining jobs in the queue. + * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch + * is called. + */ +static int halt_cpsch(struct device_queue_manager *dqm) +{ + int ret = 0; + + dqm_lock(dqm); + if (!dqm->sched_running) { + dqm_unlock(dqm); + return 0; + } + + WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n"); + + if (!dqm->is_hws_hang) { + if (!dqm->dev->kfd->shared_resources.enable_mes) + ret = unmap_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD, false); + else + ret = remove_all_queues_mes(dqm); + } + dqm->sched_halt = true; + dqm_unlock(dqm); + + return ret; +} + +/* unhalt_cpsch + * Unset dqm->sched_halt and map queues back to runlist + */ +static int unhalt_cpsch(struct device_queue_manager *dqm) +{ + int ret = 0; + + dqm_lock(dqm); + if (!dqm->sched_running || !dqm->sched_halt) { + WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n"); + dqm_unlock(dqm); + return 0; + } + dqm->sched_halt = false; + if (!dqm->dev->kfd->shared_resources.enable_mes) + ret = execute_queues_cpsch(dqm, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, + 0, USE_DEFAULT_GRACE_PERIOD); + dqm_unlock(dqm); + + return ret; +} + static int start_cpsch(struct device_queue_manager *dqm) { struct device *dev = dqm->dev->adev->dev; - int retval; + int retval, num_hw_queue_slots; retval = 0; @@ -1682,9 +1829,24 @@ static int start_cpsch(struct device_queue_manager *dqm) &dqm->wait_times); } + /* setup per-queue reset detection buffer */ + num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe * + dqm->dev->kfd->shared_resources.num_pipe_per_mec * + NUM_XCC(dqm->dev->xcc_mask); + + dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info); + dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL); + + if (!dqm->detect_hang_info) { + retval = -ENOMEM; + goto fail_detect_hang_buffer; + } + dqm_unlock(dqm); return 0; +fail_detect_hang_buffer: + kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); fail_allocate_vidmem: fail_set_sched_resources: if (!dqm->dev->kfd->shared_resources.enable_mes) @@ -1715,6 +1877,8 @@ static int stop_cpsch(struct device_queue_manager *dqm) kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); if (!dqm->dev->kfd->shared_resources.enable_mes) pm_uninit(&dqm->packet_mgr); + kfree(dqm->detect_hang_info); + dqm->detect_hang_info = NULL; dqm_unlock(dqm); return 0; @@ -1786,7 +1950,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } if (q->properties.type == KFD_QUEUE_TYPE_SDMA || - q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI || + q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { dqm_lock(dqm); retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); dqm_unlock(dqm); @@ -1913,7 +2078,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) struct device *dev = dqm->dev->adev->dev; int retval; - if (!dqm->sched_running) + if (!dqm->sched_running || dqm->sched_halt) return 0; if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) return 0; @@ -1931,6 +2096,135 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) return retval; } +static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd = qpd_to_pdd(qpd); + + dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid 0x%0x is reset\n", + q->properties.queue_id, q->process->pasid); + + pdd->has_reset_queue = true; + if (q->properties.is_active) { + q->properties.is_active = false; + decrement_queue_count(dqm, qpd, q); + } +} + +static int detect_queue_hang(struct device_queue_manager *dqm) +{ + int i; + + /* detect should be used only in dqm locked queue reset */ + if (WARN_ON(dqm->detect_hang_count > 0)) + return 0; + + memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size); + + for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { + uint32_t mec, pipe, queue; + int xcc_id; + + mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) + / dqm->dev->kfd->shared_resources.num_pipe_per_mec; + + if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) + continue; + + amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue); + + for_each_inst(xcc_id, dqm->dev->xcc_mask) { + uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr( + dqm->dev->adev, pipe, queue, xcc_id); + struct dqm_detect_hang_info hang_info; + + if (!queue_addr) + continue; + + hang_info.pipe_id = pipe; + hang_info.queue_id = queue; + hang_info.xcc_id = xcc_id; + hang_info.queue_address = queue_addr; + + dqm->detect_hang_info[dqm->detect_hang_count] = hang_info; + dqm->detect_hang_count++; + } + } + + return dqm->detect_hang_count; +} + +static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address) +{ + struct device_process_node *cur; + struct qcm_process_device *qpd; + struct queue *q; + + list_for_each_entry(cur, &dqm->queues, list) { + qpd = cur->qpd; + list_for_each_entry(q, &qpd->queues_list, list) { + if (queue_address == q->properties.queue_address) + return q; + } + } + + return NULL; +} + +/* only for compute queue */ +static int reset_queues_on_hws_hang(struct device_queue_manager *dqm) +{ + int r = 0, reset_count = 0, i; + + if (!dqm->detect_hang_info || dqm->is_hws_hang) + return -EIO; + + /* assume dqm locked. */ + if (!detect_queue_hang(dqm)) + return -ENOTRECOVERABLE; + + for (i = 0; i < dqm->detect_hang_count; i++) { + struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i]; + struct queue *q = find_queue_by_address(dqm, hang_info.queue_address); + struct kfd_process_device *pdd; + uint64_t queue_addr = 0; + + if (!q) { + r = -ENOTRECOVERABLE; + goto reset_fail; + } + + pdd = kfd_get_process_device_data(dqm->dev, q->process); + if (!pdd) { + r = -ENOTRECOVERABLE; + goto reset_fail; + } + + queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev, + hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id, + KFD_UNMAP_LATENCY_MS); + + /* either reset failed or we reset an unexpected queue. */ + if (queue_addr != q->properties.queue_address) { + r = -ENOTRECOVERABLE; + goto reset_fail; + } + + set_queue_as_reset(dqm, q, &pdd->qpd); + reset_count++; + } + + if (reset_count == dqm->detect_hang_count) + kfd_signal_reset_event(dqm->dev); + else + r = -ENOTRECOVERABLE; + +reset_fail: + dqm->detect_hang_count = 0; + + return r; +} + /* dqm->lock mutex has to be locked before calling this function */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, @@ -1981,11 +2275,14 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { - while (halt_if_hws_hang) - schedule(); - kfd_hws_hang(dqm); - retval = -ETIME; - goto out; + if (reset_queues_on_hws_hang(dqm)) { + while (halt_if_hws_hang) + schedule(); + dqm->is_hws_hang = true; + kfd_hws_hang(dqm); + retval = -ETIME; + goto out; + } } /* We need to reset the grace period value for this device */ @@ -2004,8 +2301,7 @@ out: } /* only for compute queue */ -static int reset_queues_cpsch(struct device_queue_manager *dqm, - uint16_t pasid) +static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid) { int retval; @@ -2525,6 +2821,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) dqm->ops.initialize = initialize_cpsch; dqm->ops.start = start_cpsch; dqm->ops.stop = stop_cpsch; + dqm->ops.halt = halt_cpsch; + dqm->ops.unhalt = unhalt_cpsch; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; dqm->ops.register_process = register_process; @@ -2621,7 +2919,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, { WARN(!mqd, "No hiq sdma mqd trunk to free"); - amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem); } void device_queue_manager_uninit(struct device_queue_manager *dqm) @@ -2633,6 +2931,95 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) kfree(dqm); } +int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id) +{ + struct kfd_process_device *pdd; + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + struct device_queue_manager *dqm = knode->dqm; + struct device *dev = dqm->dev->adev->dev; + struct qcm_process_device *qpd; + struct queue *q = NULL; + int ret = 0; + + if (!p) + return -EINVAL; + + dqm_lock(dqm); + + pdd = kfd_get_process_device_data(dqm->dev, p); + if (pdd) { + qpd = &pdd->qpd; + + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->doorbell_id == doorbell_id && q->properties.is_active) { + ret = suspend_all_queues_mes(dqm); + if (ret) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + + q->properties.is_evicted = true; + q->properties.is_active = false; + decrement_queue_count(dqm, qpd, q); + + ret = remove_queue_mes(dqm, q, qpd); + if (ret) { + dev_err(dev, "Removing bad queue failed"); + goto out; + } + + ret = resume_all_queues_mes(dqm); + if (ret) + dev_err(dev, "Resuming all queues failed"); + + break; + } + } + } + +out: + dqm_unlock(dqm); + return ret; +} + +static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct device *dev = dqm->dev->adev->dev; + int ret = 0; + + /* Check if process is already evicted */ + dqm_lock(dqm); + if (qpd->evicted) { + /* Increment the evicted count to make sure the + * process stays evicted before its terminated. + */ + qpd->evicted++; + dqm_unlock(dqm); + goto out; + } + dqm_unlock(dqm); + + ret = suspend_all_queues_mes(dqm); + if (ret) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + + ret = dqm->ops.evict_process_queues(dqm, qpd); + if (ret) { + dev_err(dev, "Evicting process queues failed"); + goto out; + } + + ret = resume_all_queues_mes(dqm); + if (ret) + dev_err(dev, "Resuming all queues failed"); + +out: + return ret; +} + int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) { struct kfd_process_device *pdd; @@ -2643,8 +3030,13 @@ int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) return -EINVAL; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); pdd = kfd_get_process_device_data(dqm->dev, p); - if (pdd) - ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); + if (pdd) { + if (dqm->dev->kfd->shared_resources.enable_mes) + ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd); + else + ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); + } + kfd_unref_process(p); return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 3b9b8eabaacc..08b40826ad1e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -106,6 +106,12 @@ union GRBM_GFX_INDEX_BITS { * @uninitialize: Destroys all the device queue manager resources allocated in * initialize routine. * + * @halt: This routine unmaps queues from runlist and set halt status to true + * so no more queues will be mapped to runlist until unhalt. + * + * @unhalt: This routine unset halt status to flase and maps queues back to + * runlist. + * * @create_kernel_queue: Creates kernel queue. Used for debug queue. * * @destroy_kernel_queue: Destroys kernel queue. Used for debug queue. @@ -153,6 +159,8 @@ struct device_queue_manager_ops { int (*start)(struct device_queue_manager *dqm); int (*stop)(struct device_queue_manager *dqm); void (*uninitialize)(struct device_queue_manager *dqm); + int (*halt)(struct device_queue_manager *dqm); + int (*unhalt)(struct device_queue_manager *dqm); int (*create_kernel_queue)(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd); @@ -210,6 +218,13 @@ struct device_queue_manager_asic_ops { struct kfd_node *dev); }; +struct dqm_detect_hang_info { + int pipe_id; + int queue_id; + int xcc_id; + uint64_t queue_address; +}; + /** * struct device_queue_manager * @@ -257,6 +272,7 @@ struct device_queue_manager { struct work_struct hw_exception_work; struct kfd_mem_obj hiq_sdma_mqd; bool sched_running; + bool sched_halt; /* used for GFX 9.4.3 only */ uint32_t current_logical_xcc_start; @@ -264,6 +280,11 @@ struct device_queue_manager { uint32_t wait_times; wait_queue_head_t destroy_wait; + + /* for per-queue reset support */ + struct dqm_detect_hang_info *detect_hang_info; + size_t detect_hang_info_size; + int detect_hang_count; }; void device_queue_manager_init_cik( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 9b33d9d2c9ad..ea3792249209 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -31,6 +31,7 @@ #include <linux/memory.h> #include "kfd_priv.h" #include "kfd_events.h" +#include "kfd_device_queue_manager.h" #include <linux/device.h> /* @@ -1244,12 +1245,33 @@ void kfd_signal_reset_event(struct kfd_node *dev) idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id); + struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p); if (unlikely(user_gpu_id == -EINVAL)) { WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id); continue; } + if (unlikely(!pdd)) { + WARN_ONCE(1, "Could not get device data from pasid:0x%x\n", p->pasid); + continue; + } + + if (dev->dqm->detect_hang_count && !pdd->has_reset_queue) + continue; + + if (dev->dqm->detect_hang_count) { + struct amdgpu_task_info *ti; + + ti = amdgpu_vm_get_task_info_pasid(dev->adev, p->pasid); + if (ti) { + dev_err(dev->adev->dev, + "Queues reset on process %s tid %d thread %s pid %d\n", + ti->process_name, ti->tgid, ti->task_name, ti->pid); + amdgpu_vm_put_task_info(ti); + } + } + rcu_read_lock(); id = KFD_FIRST_NONSIGNAL_EVENT_ID; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c index 8e0d0356e810..bb8cbfc39b90 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c @@ -129,63 +129,6 @@ enum SQ_INTERRUPT_ERROR_TYPE { KFD_DEBUG_CP_BAD_OP_ECODE_MASK) \ >> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT) -static void event_interrupt_poison_consumption(struct kfd_node *dev, - uint16_t pasid, uint16_t client_id) -{ - enum amdgpu_ras_block block = 0; - int old_poison, ret = -EINVAL; - uint32_t reset = 0; - struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); - - if (!p) - return; - - /* all queues of a process will be unmapped in one time */ - old_poison = atomic_cmpxchg(&p->poison, 0, 1); - kfd_unref_process(p); - if (old_poison) - return; - - switch (client_id) { - case SOC15_IH_CLIENTID_SE0SH: - case SOC15_IH_CLIENTID_SE1SH: - case SOC15_IH_CLIENTID_SE2SH: - case SOC15_IH_CLIENTID_SE3SH: - case SOC15_IH_CLIENTID_UTCL2: - ret = kfd_dqm_evict_pasid(dev->dqm, pasid); - block = AMDGPU_RAS_BLOCK__GFX; - if (ret) - reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; - break; - case SOC15_IH_CLIENTID_SDMA0: - case SOC15_IH_CLIENTID_SDMA1: - case SOC15_IH_CLIENTID_SDMA2: - case SOC15_IH_CLIENTID_SDMA3: - case SOC15_IH_CLIENTID_SDMA4: - block = AMDGPU_RAS_BLOCK__SDMA; - reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; - break; - default: - break; - } - - kfd_signal_poison_consumed_event(dev, pasid); - - /* resetting queue passes, do page retirement without gpu reset - * resetting queue fails, fallback to gpu reset solution - */ - if (!ret) - dev_warn(dev->adev->dev, - "RAS poison consumption, unmap queue flow succeeded: client id %d\n", - client_id); - else - dev_warn(dev->adev->dev, - "RAS poison consumption, fall back to gpu reset flow: client id %d\n", - client_id); - - amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset); -} - static bool event_interrupt_isr_v10(struct kfd_node *dev, const uint32_t *ih_ring_entry, uint32_t *patched_ihre, @@ -332,11 +275,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID), sq_intr_err_type); - if (sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && - sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { - event_interrupt_poison_consumption(dev, pasid, source_id); - return; - } break; default: break; @@ -362,9 +300,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_SDMA7) { if (source_id == SOC15_INTSRC_SDMA_TRAP) { kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); - } else if (source_id == SOC15_INTSRC_SDMA_ECC) { - event_interrupt_poison_consumption(dev, pasid, source_id); - return; } } else if (client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_VMC1 || @@ -388,12 +323,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev, if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) hub_inst = node_id / 4; - if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, - hub_inst, vmid_type)) { - event_interrupt_poison_consumption(dev, pasid, client_id); - return; - } - info.vmid = vmid; info.mc_id = client_id; info.page_addr = ih_ring_entry[4] | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c index f524a55eee11..b3f988b275a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c @@ -330,11 +330,14 @@ static void event_interrupt_wq_v11(struct kfd_node *dev, if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) kfd_signal_event_interrupt(pasid, context_id0, 32); else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE && - KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) - kfd_set_dbg_ev_from_interrupt(dev, pasid, - KFD_CTXID0_DOORBELL_ID(context_id0), + KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) { + u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0); + + kfd_set_dbg_ev_from_interrupt(dev, pasid, doorbell_id, KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)), NULL, 0); + kfd_dqm_suspend_bad_queue_mes(dev, pasid, doorbell_id); + } /* SDMA */ else if (source_id == SOC21_INTSRC_SDMA_TRAP) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index a9c3580be8c9..fecdbbab9894 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -431,25 +431,9 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); - uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); - int hub_inst = 0; struct kfd_hsa_memory_exception_data exception_data; - /* gfxhub */ - if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { - hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, - node_id); - if (hub_inst < 0) - hub_inst = 0; - } - - /* mmhub */ - if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) - hub_inst = node_id / 4; - - if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, - hub_inst, vmid_type)) { + if (source_id == SOC15_INTSRC_VMC_UTCL2_POISON) { event_interrupt_poison_consumption_v9(dev, pasid, client_id); return; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 50a81da43ce1..d9ae854b6908 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -225,7 +225,7 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { if (mqd_mem_obj->gtt_mem) { - amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, &mqd_mem_obj->gtt_mem); kfree(mqd_mem_obj); } else { kfd_gtt_sa_free(mm->dev, mqd_mem_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index b7a08e7a4423..d163d92a692f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -187,6 +187,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 66c73825c0a0..84e8ea3a8a0c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -321,8 +321,11 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, static bool check_preemption_failed(struct mqd_manager *mm, void *mqd) { struct v9_mqd *m = (struct v9_mqd *)mqd; + uint32_t doorbell_id = m->queue_doorbell_id0; - return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0); + m->queue_doorbell_id0 = 0; + + return kfd_check_hiq_mqd_doorbell_id(mm->dev, doorbell_id, 0); } static int get_wave_state(struct mqd_manager *mm, void *mqd, @@ -624,6 +627,7 @@ static bool check_preemption_failed_v9_4_3(struct mqd_manager *mm, void *mqd) m = get_mqd(mqd + hiq_mqd_size * inst); ret |= kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, inst); + m->queue_doorbell_id0 = 0; ++inst; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 00776f08351c..1f9f5bfeaf86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -37,11 +37,14 @@ static int pm_map_process_v9(struct packet_manager *pm, struct kfd_node *kfd = pm->dqm->dev; struct kfd_process_device *pdd = container_of(qpd, struct kfd_process_device, qpd); + struct amdgpu_device *adev = kfd->adev; packet = (struct pm4_mes_map_process *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process)); + if (adev->enforce_isolation[kfd->node_id]) + packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; packet->bitfields2.pasid = qpd->pqm->process->pasid; @@ -89,14 +92,18 @@ static int pm_map_process_aldebaran(struct packet_manager *pm, struct pm4_mes_map_process_aldebaran *packet; uint64_t vm_page_table_base_addr = qpd->page_table_base; struct kfd_dev *kfd = pm->dqm->dev->kfd; + struct kfd_node *knode = pm->dqm->dev; struct kfd_process_device *pdd = container_of(qpd, struct kfd_process_device, qpd); int i; + struct amdgpu_device *adev = kfd->adev; packet = (struct pm4_mes_map_process_aldebaran *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran)); packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process_aldebaran)); + if (adev->enforce_isolation[knode->node_id]) + packet->bitfields2.exec_cleaner_shader = 1; packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 10; packet->bitfields2.pasid = qpd->pqm->process->pasid; @@ -144,17 +151,22 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, int concurrent_proc_cnt = 0; struct kfd_node *kfd = pm->dqm->dev; + struct amdgpu_device *adev = kfd->adev; /* Determine the number of processes to map together to HW: * it can not exceed the number of VMIDs available to the * scheduler, and it is determined by the smaller of the number * of processes in the runlist and kfd module parameter * hws_max_conc_proc. + * However, if enforce_isolation is set (toggle LDS/VGPRs/SGPRs + * cleaner between process switch), enable single-process mode + * in HWS. * Note: the arbitration between the number of VMIDs and * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - concurrent_proc_cnt = min(pm->dqm->processes_count, + concurrent_proc_cnt = adev->enforce_isolation[kfd->node_id] ? + 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum); packet = (struct pm4_mes_runlist *)buffer; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index 8b6b2bd5c148..cd8611401a66 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -145,8 +145,9 @@ struct pm4_mes_map_process { union { struct { - uint32_t pasid:16; - uint32_t reserved1:2; + uint32_t pasid:16; /* 0 - 15 */ + uint32_t reserved1:1; /* 16 */ + uint32_t exec_cleaner_shader:1; /* 17 */ uint32_t debug_vmid:4; uint32_t new_debug:1; uint32_t reserved2:1; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h index 38f5cb6a222a..e0ed62c4ade0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h @@ -37,7 +37,7 @@ struct pm4_mes_map_process_aldebaran { struct { uint32_t pasid:16; /* 0 - 15 */ uint32_t single_memops:1; /* 16 */ - uint32_t reserved1:1; /* 17 */ + uint32_t exec_cleaner_shader:1; /* 17 */ uint32_t debug_vmid:4; /* 18 - 21 */ uint32_t new_debug:1; /* 22 */ uint32_t tmz:1; /* 23 */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 2b3ec92981e8..9ae9abc6eb43 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -310,6 +310,10 @@ struct kfd_node { struct kfd_local_mem_info local_mem_info; struct kfd_dev *kfd; + + /* Track per device allocated watch points */ + uint32_t alloc_watch_ids; + spinlock_t watch_points_lock; }; struct kfd_dev { @@ -362,10 +366,6 @@ struct kfd_dev { struct kfd_node *nodes[MAX_KFD_NODES]; unsigned int num_nodes; - /* Track per device allocated watch points */ - uint32_t alloc_watch_ids; - spinlock_t watch_points_lock; - /* Kernel doorbells for KFD device */ struct amdgpu_bo *doorbells; @@ -414,13 +414,16 @@ enum kfd_unmap_queues_filter { * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. * * @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface. + * + * @KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: SDMA user mode queue with target SDMA engine ID. */ enum kfd_queue_type { KFD_QUEUE_TYPE_COMPUTE, KFD_QUEUE_TYPE_SDMA, KFD_QUEUE_TYPE_HIQ, KFD_QUEUE_TYPE_DIQ, - KFD_QUEUE_TYPE_SDMA_XGMI + KFD_QUEUE_TYPE_SDMA_XGMI, + KFD_QUEUE_TYPE_SDMA_BY_ENG_ID }; enum kfd_queue_format { @@ -494,8 +497,8 @@ struct queue_properties { uint64_t queue_size; uint32_t priority; uint32_t queue_percent; - uint32_t *read_ptr; - uint32_t *write_ptr; + void __user *read_ptr; + void __user *write_ptr; void __iomem *doorbell_ptr; uint32_t doorbell_off; bool is_interop; @@ -522,6 +525,12 @@ struct queue_properties { uint64_t tba_addr; uint64_t tma_addr; uint64_t exception_status; + + struct amdgpu_bo *wptr_bo; + struct amdgpu_bo *rptr_bo; + struct amdgpu_bo *ring_bo; + struct amdgpu_bo *eop_buf_bo; + struct amdgpu_bo *cwsr_bo; }; #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \ @@ -604,7 +613,7 @@ struct queue { uint64_t gang_ctx_gpu_addr; void *gang_ctx_cpu_ptr; - struct amdgpu_bo *wptr_bo; + struct amdgpu_bo *wptr_bo_gart; }; enum KFD_MQD_TYPE { @@ -837,6 +846,9 @@ struct kfd_process_device { void *proc_ctx_bo; uint64_t proc_ctx_gpu_addr; void *proc_ctx_cpu_ptr; + + /* Tracks queue reset status */ + bool has_reset_queue; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -854,6 +866,8 @@ struct svm_range_list { struct delayed_work restore_work; DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE); struct task_struct *faulting_task; + /* check point ts decides if page fault recovery need be dropped */ + uint64_t checkpoint_ts[MAX_GPU_INSTANCE]; }; /* Process data */ @@ -1284,6 +1298,15 @@ int init_queue(struct queue **q, const struct queue_properties *properties); void uninit_queue(struct queue *q); void print_queue_properties(struct queue_properties *q); void print_queue(struct queue *q); +int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, + u64 expected_size); +void kfd_queue_buffer_put(struct amdgpu_bo **bo); +int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); +int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties); +void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo); +int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd, + struct queue_properties *properties); +void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_node *dev); @@ -1303,6 +1326,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_node *dev, enum kfd_queue_type type); void kernel_queue_uninit(struct kernel_queue *kq); int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid); +int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id); /* Process Queue Manager */ struct process_queue_node { @@ -1320,7 +1344,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct file *f, struct queue_properties *properties, unsigned int *qid, - struct amdgpu_bo *wptr_bo, const struct kfd_criu_queue_priv_data *q_data, const void *restore_mqd, const void *restore_ctl_stack, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 17e42161b015..a902950cc060 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1048,7 +1048,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) if (pdd->dev->kfd->shared_resources.enable_mes) amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, - pdd->proc_ctx_bo); + &pdd->proc_ctx_bo); /* * before destroying pdd, make sure to report availability * for auto suspend @@ -1851,6 +1851,8 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger) goto fail; } n_evicted++; + + pdd->dev->dqm->is_hws_hang = false; } return r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 21f5a1fb3bf8..20ea745729ee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -204,19 +204,23 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm, } if (dev->kfd->shared_resources.enable_mes) { - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo); - if (pqn->q->wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart); } } void pqm_uninit(struct process_queue_manager *pqm) { struct process_queue_node *pqn, *next; + struct kfd_process_device *pdd; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { - if (pqn->q) + if (pqn->q) { + pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); + kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); + kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); + } kfd_procfs_del_queue(pqn->q); uninit_queue(pqn->q); @@ -231,8 +235,7 @@ void pqm_uninit(struct process_queue_manager *pqm) static int init_user_queue(struct process_queue_manager *pqm, struct kfd_node *dev, struct queue **q, struct queue_properties *q_properties, - struct file *f, struct amdgpu_bo *wptr_bo, - unsigned int qid) + struct file *f, unsigned int qid) { int retval; @@ -263,12 +266,32 @@ static int init_user_queue(struct process_queue_manager *pqm, goto cleanup; } memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); - (*q)->wptr_bo = wptr_bo; + + /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work + * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) + */ + if (((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) + >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { + if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) { + pr_err("Queue memory allocated to wrong device\n"); + retval = -EINVAL; + goto free_gang_ctx_bo; + } + + retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo, + &(*q)->wptr_bo_gart); + if (retval) { + pr_err("Failed to map wptr bo to GART\n"); + goto free_gang_ctx_bo; + } + } } pr_debug("PQM After init queue"); return 0; +free_gang_ctx_bo: + amdgpu_amdkfd_free_gtt_mem(dev->adev, (*q)->gang_ctx_bo); cleanup: uninit_queue(*q); *q = NULL; @@ -280,7 +303,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct file *f, struct queue_properties *properties, unsigned int *qid, - struct amdgpu_bo *wptr_bo, const struct kfd_criu_queue_priv_data *q_data, const void *restore_mqd, const void *restore_ctl_stack, @@ -345,13 +367,14 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA_XGMI: + case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: /* SDMA queues are always allocated statically no matter * which scheduler mode is used. We also do not need to * check whether a SDMA queue can be allocated here, because * allocate_sdma_queue() in create_queue() has the * corresponding check logic. */ - retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); + retval = init_user_queue(pqm, dev, &q, properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -372,7 +395,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); + retval = init_user_queue(pqm, dev, &q, properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -490,6 +513,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) } if (pqn->q) { + retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); + if (retval) + goto err_destroy_queue; + kfd_procfs_del_queue(pqn->q); dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); @@ -500,7 +527,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval != -ETIME) goto err_destroy_queue; } - + kfd_queue_release_buffers(pdd, &pqn->q->properties); pqm_clean_queue_resource(pqm, pqn); uninit_queue(pqn->q); } @@ -524,11 +551,42 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, struct process_queue_node *pqn; pqn = get_queue_by_qid(pqm, qid); - if (!pqn) { + if (!pqn || !pqn->q) { pr_debug("No queue %d exists for update operation\n", qid); return -EFAULT; } + /* + * Update with NULL ring address is used to disable the queue + */ + if (p->queue_address && p->queue_size) { + struct kfd_process_device *pdd; + struct amdgpu_vm *vm; + struct queue *q = pqn->q; + int err; + + pdd = kfd_get_process_device_data(q->device, q->process); + if (!pdd) + return -ENODEV; + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo, + p->queue_size)) { + pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n", + p->queue_address, p->queue_size); + return -EFAULT; + } + + kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo); + kfd_queue_buffer_put(&pqn->q->properties.ring_bo); + amdgpu_bo_unreserve(vm->root.bo); + + pqn->q->properties.ring_bo = p->ring_bo; + } + pqn->q->properties.queue_address = p->queue_address; pqn->q->properties.queue_size = p->queue_size; pqn->q->properties.queue_percent = p->queue_percent; @@ -971,7 +1029,7 @@ int kfd_criu_restore_queue(struct kfd_process *p, print_queue_properties(&qp); - ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack, + ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack, NULL); if (ret) { pr_err("Failed to create new queue err:%d\n", ret); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 0f6992b1895c..ad29634f8b44 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -24,6 +24,8 @@ #include <linux/slab.h> #include "kfd_priv.h" +#include "kfd_topology.h" +#include "kfd_svm.h" void print_queue_properties(struct queue_properties *q) { @@ -82,3 +84,374 @@ void uninit_queue(struct queue *q) { kfree(q); } + +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + +static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size) +{ + struct kfd_process *p = pdd->process; + struct list_head update_list; + struct svm_range *prange; + int ret = -EINVAL; + + INIT_LIST_HEAD(&update_list); + addr >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + + mutex_lock(&p->svms.lock); + + /* + * range may split to multiple svm pranges aligned to granularity boundaery. + */ + while (size) { + uint32_t gpuid, gpuidx; + int r; + + prange = svm_range_from_addr(&p->svms, addr, NULL); + if (!prange) + break; + + if (!prange->mapped_to_gpu) + break; + + r = kfd_process_gpuid_from_node(p, pdd->dev, &gpuid, &gpuidx); + if (r < 0) + break; + if (!test_bit(gpuidx, prange->bitmap_access) && + !test_bit(gpuidx, prange->bitmap_aip)) + break; + + if (!(prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) + break; + + list_add(&prange->update_list, &update_list); + + if (prange->last - prange->start + 1 >= size) { + size = 0; + break; + } + + size -= prange->last - prange->start + 1; + addr += prange->last - prange->start + 1; + } + if (size) { + pr_debug("[0x%llx 0x%llx] not registered\n", addr, addr + size - 1); + goto out_unlock; + } + + list_for_each_entry(prange, &update_list, update_list) + atomic_inc(&prange->queue_refcount); + ret = 0; + +out_unlock: + mutex_unlock(&p->svms.lock); + return ret; +} + +static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size) +{ + struct kfd_process *p = pdd->process; + struct svm_range *prange, *pchild; + struct interval_tree_node *node; + unsigned long last; + + addr >>= PAGE_SHIFT; + last = addr + (size >> PAGE_SHIFT) - 1; + + mutex_lock(&p->svms.lock); + + node = interval_tree_iter_first(&p->svms.objects, addr, last); + while (node) { + struct interval_tree_node *next_node; + unsigned long next_start; + + prange = container_of(node, struct svm_range, it_node); + next_node = interval_tree_iter_next(node, addr, last); + next_start = min(node->last, last) + 1; + + if (atomic_add_unless(&prange->queue_refcount, -1, 0)) { + list_for_each_entry(pchild, &prange->child_list, child_list) + atomic_add_unless(&pchild->queue_refcount, -1, 0); + } + + node = next_node; + addr = next_start; + } + + mutex_unlock(&p->svms.lock); +} +#else + +static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size) +{ + return -EINVAL; +} + +static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size) +{ +} + +#endif + +int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo, + u64 expected_size) +{ + struct amdgpu_bo_va_mapping *mapping; + u64 user_addr; + u64 size; + + user_addr = (u64)addr >> AMDGPU_GPU_PAGE_SHIFT; + size = expected_size >> AMDGPU_GPU_PAGE_SHIFT; + + mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr); + if (!mapping) + goto out_err; + + if (user_addr != mapping->start || + (size != 0 && user_addr + size - 1 != mapping->last)) { + pr_debug("expected size 0x%llx not equal to mapping addr 0x%llx size 0x%llx\n", + expected_size, mapping->start << AMDGPU_GPU_PAGE_SHIFT, + (mapping->last - mapping->start + 1) << AMDGPU_GPU_PAGE_SHIFT); + goto out_err; + } + + *pbo = amdgpu_bo_ref(mapping->bo_va->base.bo); + mapping->bo_va->queue_refcount++; + return 0; + +out_err: + *pbo = NULL; + return -EINVAL; +} + +/* FIXME: remove this function, just call amdgpu_bo_unref directly */ +void kfd_queue_buffer_put(struct amdgpu_bo **bo) +{ + amdgpu_bo_unref(bo); +} + +int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) +{ + struct kfd_topology_device *topo_dev; + struct amdgpu_vm *vm; + u32 total_cwsr_size; + int err; + + topo_dev = kfd_topology_device_by_id(pdd->dev->id); + if (!topo_dev) + return -EINVAL; + + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE); + if (err) + goto out_err_unreserve; + + err = kfd_queue_buffer_get(vm, properties->read_ptr, &properties->rptr_bo, PAGE_SIZE); + if (err) + goto out_err_unreserve; + + err = kfd_queue_buffer_get(vm, (void *)properties->queue_address, + &properties->ring_bo, properties->queue_size); + if (err) + goto out_err_unreserve; + + /* only compute queue requires EOP buffer and CWSR area */ + if (properties->type != KFD_QUEUE_TYPE_COMPUTE) + goto out_unreserve; + + /* EOP buffer is not required for all ASICs */ + if (properties->eop_ring_buffer_address) { + if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { + pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", + properties->eop_buf_bo->tbo.base.size, + topo_dev->node_props.eop_buffer_size); + err = -EINVAL; + goto out_err_unreserve; + } + err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, + &properties->eop_buf_bo, + properties->eop_ring_buffer_size); + if (err) + goto out_err_unreserve; + } + + if (properties->ctl_stack_size != topo_dev->node_props.ctl_stack_size) { + pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n", + properties->ctl_stack_size, + topo_dev->node_props.ctl_stack_size); + err = -EINVAL; + goto out_err_unreserve; + } + + if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) { + pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", + properties->ctx_save_restore_area_size, + topo_dev->node_props.cwsr_size); + err = -EINVAL; + goto out_err_unreserve; + } + + total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) + * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); + + err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, + &properties->cwsr_bo, total_cwsr_size); + if (!err) + goto out_unreserve; + + amdgpu_bo_unreserve(vm->root.bo); + + err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address, + total_cwsr_size); + if (err) + goto out_err_release; + + return 0; + +out_unreserve: + amdgpu_bo_unreserve(vm->root.bo); + return 0; + +out_err_unreserve: + amdgpu_bo_unreserve(vm->root.bo); +out_err_release: + /* FIXME: make a _locked version of this that can be called before + * dropping the VM reservation. + */ + kfd_queue_unref_bo_vas(pdd, properties); + kfd_queue_release_buffers(pdd, properties); + return err; +} + +int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) +{ + struct kfd_topology_device *topo_dev; + u32 total_cwsr_size; + + kfd_queue_buffer_put(&properties->wptr_bo); + kfd_queue_buffer_put(&properties->rptr_bo); + kfd_queue_buffer_put(&properties->ring_bo); + kfd_queue_buffer_put(&properties->eop_buf_bo); + kfd_queue_buffer_put(&properties->cwsr_bo); + + topo_dev = kfd_topology_device_by_id(pdd->dev->id); + if (!topo_dev) + return -EINVAL; + total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) + * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); + + kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size); + return 0; +} + +void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo) +{ + if (*bo) { + struct amdgpu_bo_va *bo_va; + + bo_va = amdgpu_vm_bo_find(vm, *bo); + if (bo_va && bo_va->queue_refcount) + bo_va->queue_refcount--; + } +} + +int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd, + struct queue_properties *properties) +{ + struct amdgpu_vm *vm; + int err; + + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + kfd_queue_unref_bo_va(vm, &properties->wptr_bo); + kfd_queue_unref_bo_va(vm, &properties->rptr_bo); + kfd_queue_unref_bo_va(vm, &properties->ring_bo); + kfd_queue_unref_bo_va(vm, &properties->eop_buf_bo); + kfd_queue_unref_bo_va(vm, &properties->cwsr_bo); + + amdgpu_bo_unreserve(vm->root.bo); + return 0; +} + +#define SGPR_SIZE_PER_CU 0x4000 +#define LDS_SIZE_PER_CU 0x10000 +#define HWREG_SIZE_PER_CU 0x1000 +#define DEBUGGER_BYTES_ALIGN 64 +#define DEBUGGER_BYTES_PER_WAVE 32 + +static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) +{ + u32 vgpr_size = 0x40000; + + if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */ + gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */ + gfxv == 90008) /* GFX_VERSION_ARCTURUS */ + vgpr_size = 0x80000; + else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */ + gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */ + gfxv == 120000 || /* GFX_VERSION_GFX1200 */ + gfxv == 120001) /* GFX_VERSION_GFX1201 */ + vgpr_size = 0x60000; + + return vgpr_size; +} + +#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \ + (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\ + LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU) + +#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \ + ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/ + +#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40 + +void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) +{ + struct kfd_node_properties *props = &dev->node_props; + u32 gfxv = props->gfx_target_version; + u32 ctl_stack_size; + u32 wg_data_size; + u32 wave_num; + u32 cu_num; + + if (gfxv < 80001) /* GFX_VERSION_CARRIZO */ + return; + + cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask); + wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */ + min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512) + : cu_num * 32; + + wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE); + ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8; + ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size, + PAGE_SIZE); + + if ((gfxv / 10000 * 10000) == 100000) { + /* HW design limits control stack size to 0x7000. + * This is insufficient for theoretical PM4 cases + * but sufficient for AQL, limited by SPI events. + */ + ctl_stack_size = min(ctl_stack_size, 0x7000); + } + + props->ctl_stack_size = ctl_stack_size; + props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN); + props->cwsr_size = ctl_stack_size + wg_data_size; + + if (gfxv == 80002) /* GFX_VERSION_TONGA */ + props->eop_buffer_size = 0x8000; + else if ((gfxv / 100 * 100) == 90400) /* GFX_VERSION_AQUA_VANJARAM */ + props->eop_buffer_size = 4096; + else if (gfxv >= 80000) + props->eop_buffer_size = 4096; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index bd9c2921e0dc..ce2a5d9f90d3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1051,6 +1051,7 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old, new->mapped_to_gpu = old->mapped_to_gpu; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount)); return 0; } @@ -1992,6 +1993,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) new->vram_pages = old->vram_pages; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount)); return new; } @@ -2260,16 +2262,10 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms) { struct kfd_process_device *pdd; struct kfd_process *p; - int drain; uint32_t i; p = container_of(svms, struct kfd_process, svms); -restart: - drain = atomic_read(&svms->drain_pagefaults); - if (!drain) - return; - for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { pdd = p->pdds[i]; if (!pdd) @@ -2289,8 +2285,6 @@ restart: pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); } - if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) - goto restart; } static void svm_range_deferred_list_work(struct work_struct *work) @@ -2312,17 +2306,8 @@ static void svm_range_deferred_list_work(struct work_struct *work) prange->start, prange->last, prange->work_item.op); mm = prange->work_item.mm; -retry: - mmap_write_lock(mm); - /* Checking for the need to drain retry faults must be inside - * mmap write lock to serialize with munmap notifiers. - */ - if (unlikely(atomic_read(&svms->drain_pagefaults))) { - mmap_write_unlock(mm); - svm_range_drain_retry_fault(svms); - goto retry; - } + mmap_write_lock(mm); /* Remove from deferred_list must be inside mmap write lock, for * two race cases: @@ -2443,6 +2428,17 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, struct kfd_process *p; unsigned long s, l; bool unmap_parent; + uint32_t i; + + if (atomic_read(&prange->queue_refcount)) { + int r; + + pr_warn("Freeing queue vital buffer 0x%lx, queue evicted\n", + prange->start << PAGE_SHIFT); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM); + if (r) + pr_debug("failed %d to quiesce KFD queues\n", r); + } p = kfd_lookup_process_by_mm(mm); if (!p) @@ -2452,11 +2448,35 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last, start, last); - /* Make sure pending page faults are drained in the deferred worker - * before the range is freed to avoid straggler interrupts on - * unmapped memory causing "phantom faults". + /* calculate time stamps that are used to decide which page faults need be + * dropped or handled before unmap pages from gpu vm */ - atomic_inc(&svms->drain_pagefaults); + for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + struct amdgpu_ih_ring *ih; + uint32_t checkpoint_wptr; + + pdd = p->pdds[i]; + if (!pdd) + continue; + + adev = pdd->dev->adev; + + /* Check and drain ih1 ring if cam not available */ + ih = &adev->irq.ih1; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) { + svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + continue; + } + + /* check if dev->irq.ih_soft is not empty */ + ih = &adev->irq.ih_soft; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) + svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + } unmap_parent = start <= prange->start && last >= prange->last; @@ -2897,7 +2917,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault) int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, - uint64_t addr, bool write_fault) + uint64_t addr, uint64_t ts, bool write_fault) { unsigned long start, last, size; struct mm_struct *mm = NULL; @@ -2907,7 +2927,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, ktime_t timestamp = ktime_get_boottime(); struct kfd_node *node; int32_t best_loc; - int32_t gpuidx = MAX_GPU_INSTANCE; + int32_t gpuid, gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; struct vm_area_struct *vma; bool migration = false; @@ -2928,11 +2948,38 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); if (atomic_read(&svms->drain_pagefaults)) { - pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + pr_debug("page fault handling disabled, drop fault 0x%llx\n", addr); r = 0; goto out; } + node = kfd_node_by_irq_ids(adev, node_id, vmid); + if (!node) { + pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, + vmid); + r = -EFAULT; + goto out; + } + + if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) { + pr_debug("failed to get gpuid/gpuidex for node_id: %d\n", node_id); + r = -EFAULT; + goto out; + } + + /* check if this page fault time stamp is before svms->checkpoint_ts */ + if (svms->checkpoint_ts[gpuidx] != 0) { + if (amdgpu_ih_ts_after(ts, svms->checkpoint_ts[gpuidx])) { + pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = 0; + goto out; + } else + /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts + * to zero to avoid following ts wrap around give wrong comparing + */ + svms->checkpoint_ts[gpuidx] = 0; + } + if (!p->xnack_enabled) { pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); r = -EFAULT; @@ -2949,13 +2996,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - node = kfd_node_by_irq_ids(adev, node_id, vmid); - if (!node) { - pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, - vmid); - r = -EFAULT; - goto out; - } mmap_read_lock(mm); retry_write_locked: mutex_lock(&svms->lock); @@ -3170,8 +3210,9 @@ void svm_range_list_fini(struct kfd_process *p) /* * Ensure no retry fault comes in afterwards, as page fault handler will * not find kfd process and take mm lock to recover fault. + * stop kfd page fault handing, then wait pending page faults got drained */ - atomic_inc(&p->svms.drain_pagefaults); + atomic_set(&p->svms.drain_pagefaults, 1); svm_range_drain_retry_fault(&p->svms); list_for_each_entry_safe(prange, next, &p->svms.list, list) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 70c1776611c4..bddd24f04669 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -137,6 +137,7 @@ struct svm_range { DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); bool mapped_to_gpu; + atomic_t queue_refcount; }; static inline void svm_range_lock(struct svm_range *prange) @@ -173,7 +174,7 @@ int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear); void svm_range_vram_node_free(struct svm_range *prange); int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, - uint32_t vmid, uint32_t node_id, uint64_t addr, + uint32_t vmid, uint32_t node_id, uint64_t addr, uint64_t ts, bool write_fault); int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence); void svm_range_add_list_work(struct svm_range_list *svms, @@ -224,7 +225,7 @@ static inline void svm_range_list_fini(struct kfd_process *p) static inline int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t client_id, uint32_t node_id, - uint64_t addr, bool write_fault) + uint64_t addr, uint64_t ts, bool write_fault) { return -EFAULT; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 6f89b06f89d3..3871591c9aec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -292,6 +292,8 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, iolink->max_bandwidth); sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size", iolink->rec_transfer_size); + sysfs_show_32bit_prop(buffer, offs, "recommended_sdma_engine_id_mask", + iolink->rec_sdma_eng_id_mask); sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); return offs; @@ -1265,6 +1267,54 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev, } } +#define REC_SDMA_NUM_GPU 8 +static const int rec_sdma_eng_map[REC_SDMA_NUM_GPU][REC_SDMA_NUM_GPU] = { + { -1, 14, 12, 2, 4, 8, 10, 6 }, + { 14, -1, 2, 10, 8, 4, 6, 12 }, + { 10, 2, -1, 12, 14, 6, 4, 8 }, + { 2, 12, 10, -1, 6, 14, 8, 4 }, + { 4, 8, 14, 6, -1, 10, 12, 2 }, + { 8, 4, 6, 14, 12, -1, 2, 10 }, + { 10, 6, 4, 8, 12, 2, -1, 14 }, + { 6, 12, 8, 4, 2, 10, 14, -1 }}; + +static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev, + struct kfd_iolink_properties *outbound_link, + struct kfd_iolink_properties *inbound_link) +{ + struct kfd_node *gpu = outbound_link->gpu; + struct amdgpu_device *adev = gpu->adev; + int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && + adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 && + kfd_get_num_xgmi_sdma_engines(gpu) >= 14 && + (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); + + if (support_rec_eng) { + int src_socket_id = adev->gmc.xgmi.physical_node_id; + int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; + + outbound_link->rec_sdma_eng_id_mask = + 1 << rec_sdma_eng_map[src_socket_id][dst_socket_id]; + inbound_link->rec_sdma_eng_id_mask = + 1 << rec_sdma_eng_map[dst_socket_id][src_socket_id]; + } else { + int num_sdma_eng = kfd_get_num_sdma_engines(gpu); + int i, eng_offset = 0; + + if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && + kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) { + eng_offset = num_sdma_eng; + num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu); + } + + for (i = 0; i < num_sdma_eng; i++) { + outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); + inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); + } + } +} + static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) { struct kfd_iolink_properties *link, *inbound_link; @@ -1303,6 +1353,7 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); + kfd_set_recommended_sdma_engines(peer_dev, link, inbound_link); } } @@ -2027,7 +2078,7 @@ int kfd_topology_add_device(struct kfd_node *gpu) HSA_CAP_ASIC_REVISION_MASK); dev->node_props.location_id = pci_dev_id(gpu->adev->pdev); - if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3)) + if (gpu->kfd->num_nodes > 1) dev->node_props.location_id |= dev->gpu->node_id; dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus); @@ -2120,6 +2171,8 @@ int kfd_topology_add_device(struct kfd_node *gpu) dev->gpu->adev->gmc.xgmi.connected_to_cpu) dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS; + kfd_queue_ctx_save_restore_size(dev); + kfd_debug_print_topology(); kfd_notify_gpu_change(gpu_id, 1); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 2d1c9d771bef..155b5c410af1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -74,6 +74,10 @@ struct kfd_node_properties { uint32_t num_sdma_xgmi_engines; uint32_t num_sdma_queues_per_engine; uint32_t num_cp_queues; + uint32_t cwsr_size; + uint32_t ctl_stack_size; + uint32_t eop_buffer_size; + uint32_t debug_memory_size; char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; @@ -121,6 +125,7 @@ struct kfd_iolink_properties { uint32_t min_bandwidth; uint32_t max_bandwidth; uint32_t rec_transfer_size; + uint32_t rec_sdma_eng_id_mask; uint32_t flags; struct kfd_node *gpu; struct kobject *kobj; diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h index 10138676f27f..e5c0205f2618 100644 --- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -29,6 +29,7 @@ #define SOC15_INTSRC_CP_BAD_OPCODE 183 #define SOC15_INTSRC_SQ_INTERRUPT_MSG 239 #define SOC15_INTSRC_VMC_FAULT 0 +#define SOC15_INTSRC_VMC_UTCL2_POISON 1 #define SOC15_INTSRC_SDMA_TRAP 224 #define SOC15_INTSRC_SDMA_ECC 220 #define SOC21_INTSRC_SDMA_TRAP 49 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0ed6ce40bec8..5f7b178ba870 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -176,6 +176,7 @@ MODULE_FIRMWARE(FIRMWARE_DCN_401_DMUB); static int amdgpu_dm_init(struct amdgpu_device *adev); static void amdgpu_dm_fini(struct amdgpu_device *adev); static bool is_freesync_video_mode(const struct drm_display_mode *mode, struct amdgpu_dm_connector *aconnector); +static void reset_freesync_config_for_crtc(struct dm_crtc_state *new_crtc_state); static enum drm_mode_subconnector get_subconnector_type(struct dc_link *link) { @@ -877,6 +878,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) "HPD_IRQ", "SET_CONFIGC_REPLY", "DPIA_NOTIFICATION", + "HPD_SENSE_NOTIFY", }; do { @@ -1740,7 +1742,7 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device * /* Send the chunk */ ret = dm_dmub_send_vbios_gpint_command(adev, send_addrs[i], chunk, 30000); if (ret != DMUB_STATUS_OK) - /* No need to free bb here since it shall be done unconditionally <elsewhere> */ + /* No need to free bb here since it shall be done in dm_sw_fini() */ return NULL; } @@ -2218,7 +2220,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev) return 0; } - r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, fw_name_dmcu); + r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, "%s", fw_name_dmcu); if (r == -ENODEV) { /* DMCU firmware is not necessary, so don't raise a fuss if it's missing */ DRM_DEBUG_KMS("dm: DMCU firmware not found\n"); @@ -2465,8 +2467,17 @@ static int dm_sw_init(void *handle) static int dm_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct dal_allocation *da; + + list_for_each_entry(da, &adev->dm.da_list, list) { + if (adev->dm.bb_from_dmub == (void *) da->cpu_ptr) { + amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr); + list_del(&da->list); + kfree(da); + break; + } + } - kfree(adev->dm.bb_from_dmub); adev->dm.bb_from_dmub = NULL; kfree(adev->dm.dmub_fb_info); @@ -2894,6 +2905,9 @@ static int dm_suspend(void *handle) hpd_rx_irq_work_suspend(dm); + if (adev->dm.dc->caps.ips_support) + dc_allow_idle_optimizations(adev->dm.dc, true); + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); @@ -3204,8 +3218,11 @@ static int dm_resume(void *handle) drm_connector_list_iter_end(&iter); /* Force mode set in atomic commit */ - for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) + for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) { new_crtc_state->active_changed = true; + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + reset_freesync_config_for_crtc(dm_new_crtc_state); + } /* * atomic_check is expected to create the dc states. We need to release @@ -4837,18 +4854,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) /* Determine whether to enable Replay support by default. */ if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) { switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { -/* - * Disabled by default due to https://gitlab.freedesktop.org/drm/amd/-/issues/3344 - * case IP_VERSION(3, 1, 4): - * case IP_VERSION(3, 1, 5): - * case IP_VERSION(3, 1, 6): - * case IP_VERSION(3, 2, 0): - * case IP_VERSION(3, 2, 1): - * case IP_VERSION(3, 5, 0): - * case IP_VERSION(3, 5, 1): - * replay_feature_enabled = true; - * break; - */ + case IP_VERSION(3, 1, 4): + case IP_VERSION(3, 2, 0): + case IP_VERSION(3, 2, 1): + case IP_VERSION(3, 5, 0): + case IP_VERSION(3, 5, 1): + replay_feature_enabled = true; + break; + default: replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK; break; @@ -5139,7 +5152,7 @@ static int dm_init_microcode(struct amdgpu_device *adev) /* ASIC doesn't support DMUB. */ return 0; } - r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, fw_name_dmub); + r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, "%s", fw_name_dmub); return r; } @@ -7190,6 +7203,9 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, int requested_bpc = drm_state ? drm_state->max_requested_bpc : 8; enum dc_status dc_result = DC_OK; + if (!dm_state) + return NULL; + do { stream = create_stream_for_sink(connector, drm_mode, dm_state, old_stream, @@ -8729,7 +8745,8 @@ static void amdgpu_dm_update_cursor(struct drm_plane *plane, adev->dm.dc->caps.color.dpp.gamma_corr) attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1; - attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; + if (afb) + attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; if (crtc_state->stream) { if (!dc_stream_set_cursor_attributes(crtc_state->stream, @@ -9319,7 +9336,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, if (acrtc) old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base); - if (!acrtc->wb_enabled) + if (!acrtc || !acrtc->wb_enabled) continue; dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); @@ -9723,9 +9740,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) DRM_INFO("[HDCP_DM] hdcp_update_display enable_encryption = %x\n", enable_encryption); - hdcp_update_display( - adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector, - new_con_state->hdcp_content_type, enable_encryption); + if (aconnector->dc_link) + hdcp_update_display( + adev->dm.hdcp_workqueue, aconnector->dc_link->link_index, aconnector, + new_con_state->hdcp_content_type, enable_encryption); } } @@ -11734,25 +11752,6 @@ fail: return ret; } -static bool is_dp_capable_without_timing_msa(struct dc *dc, - struct amdgpu_dm_connector *amdgpu_dm_connector) -{ - u8 dpcd_data; - bool capable = false; - - if (amdgpu_dm_connector->dc_link && - dm_helpers_dp_read_dpcd( - NULL, - amdgpu_dm_connector->dc_link, - DP_DOWN_STREAM_PORT_COUNT, - &dpcd_data, - sizeof(dpcd_data))) { - capable = (dpcd_data & DP_MSA_TIMING_PAR_IGNORED) ? true:false; - } - - return capable; -} - static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, unsigned int offset, unsigned int total_length, @@ -12055,8 +12054,8 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, sink->sink_signal == SIGNAL_TYPE_EDP)) { bool edid_check_required = false; - if (is_dp_capable_without_timing_msa(adev->dm.dc, - amdgpu_dm_connector)) { + if (amdgpu_dm_connector->dc_link && + amdgpu_dm_connector->dc_link->dpcd_caps.allow_invalid_MSA_timing_param) { if (edid->features & DRM_EDID_FEATURE_CONTINUOUS_FREQ) { amdgpu_dm_connector->min_vfreq = connector->display_info.monitor_range.min_vfreq; amdgpu_dm_connector->max_vfreq = connector->display_info.monitor_range.max_vfreq; @@ -12138,7 +12137,8 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, } } - as_type = dm_get_adaptive_sync_support_type(amdgpu_dm_connector->dc_link); + if (amdgpu_dm_connector->dc_link) + as_type = dm_get_adaptive_sync_support_type(amdgpu_dm_connector->dc_link); if (as_type == FREESYNC_TYPE_PCON_IN_WHITELIST) { i = parse_hdmi_amd_vsdb(amdgpu_dm_connector, edid, &vsdb_info); @@ -12162,6 +12162,12 @@ update: if (dm_con_state) dm_con_state->freesync_capable = freesync_capable; + if (connector->state && amdgpu_dm_connector->dc_link && !freesync_capable && + amdgpu_dm_connector->dc_link->replay_settings.config.replay_supported) { + amdgpu_dm_connector->dc_link->replay_settings.config.replay_supported = false; + amdgpu_dm_connector->dc_link->replay_settings.replay_feature_enabled = false; + } + if (connector->vrr_capable_property) drm_connector_set_vrr_capable_property(connector, freesync_capable); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 99014339aaa3..a2cf2c066a76 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -251,9 +251,10 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) else if (dm->active_vblank_irq_count) dm->active_vblank_irq_count--; - dc_allow_idle_optimizations(dm->dc, dm->active_vblank_irq_count == 0); - - DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); + if (dm->active_vblank_irq_count > 0) { + DRM_DEBUG_KMS("Allow idle optimizations (MALL): false\n"); + dc_allow_idle_optimizations(dm->dc, false); + } /* * Control PSR based on vblank requirements from OS @@ -272,6 +273,11 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) vblank_work->stream->link->replay_settings.replay_feature_enabled); } + if (dm->active_vblank_irq_count == 0) { + DRM_DEBUG_KMS("Allow idle optimizations (MALL): true\n"); + dc_allow_idle_optimizations(dm->dc, true); + } + mutex_unlock(&dm->dc_lock); dc_stream_release(vblank_work->stream); @@ -286,11 +292,14 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); struct amdgpu_display_manager *dm = &adev->dm; struct vblank_control_work *work; + int irq_type; int rc = 0; if (acrtc->otg_inst == -1) goto skip; + irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id); + if (enable) { /* vblank irq on -> Only need vupdate irq in vrr mode */ if (amdgpu_dm_crtc_vrr_active(acrtc_state)) @@ -303,13 +312,52 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) if (rc) return rc; - rc = (enable) - ? amdgpu_irq_get(adev, &adev->crtc_irq, acrtc->crtc_id) - : amdgpu_irq_put(adev, &adev->crtc_irq, acrtc->crtc_id); + /* crtc vblank or vstartup interrupt */ + if (enable) { + rc = amdgpu_irq_get(adev, &adev->crtc_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Get crtc_irq ret=%d\n", rc); + } else { + rc = amdgpu_irq_put(adev, &adev->crtc_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Put crtc_irq ret=%d\n", rc); + } if (rc) return rc; + /* + * hubp surface flip interrupt + * + * We have no guarantee that the frontend index maps to the same + * backend index - some even map to more than one. + * + * TODO: Use a different interrupt or check DC itself for the mapping. + */ + if (enable) { + rc = amdgpu_irq_get(adev, &adev->pageflip_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Get pageflip_irq ret=%d\n", rc); + } else { + rc = amdgpu_irq_put(adev, &adev->pageflip_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Put pageflip_irq ret=%d\n", rc); + } + + if (rc) + return rc; + +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + /* crtc vline0 interrupt, only available on DCN+ */ + if (amdgpu_ip_version(adev, DCE_HWIP, 0) != 0) { + if (enable) { + rc = amdgpu_irq_get(adev, &adev->vline0_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Get vline0_irq ret=%d\n", rc); + } else { + rc = amdgpu_irq_put(adev, &adev->vline0_irq, irq_type); + drm_dbg_vbl(crtc->dev, "Put vline0_irq ret=%d\n", rc); + } + + if (rc) + return rc; + } +#endif skip: if (amdgpu_in_reset(adev)) return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 62cb59f00929..db56b0aa5454 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -3804,9 +3804,12 @@ static int trigger_hpd_mst_set(void *data, u64 val) if (aconnector->dc_link->type == dc_connection_mst_branch && aconnector->mst_mgr.aux) { mutex_lock(&adev->dm.dc_lock); - dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD); + ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD); mutex_unlock(&adev->dm.dc_lock); + if (!ret) + DRM_ERROR("DM_MST: Failed to detect dc link!"); + ret = drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true); if (ret < 0) DRM_ERROR("DM_MST: Failed to set the device into MST mode!"); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index b490ae67b6be..165e010fe69c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1286,3 +1286,15 @@ enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link) return as_type; } + +bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream) +{ + // TODO + return false; +} + +bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream) +{ + // TODO + return false; +}
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5442da90f508..2e9f6da1acdc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -804,12 +804,25 @@ struct dsc_mst_fairness_params { }; #if defined(CONFIG_DRM_AMD_DC_FP) -static int kbps_to_peak_pbn(int kbps) +static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link) +{ + u8 link_coding_cap; + uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B; + + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link); + if (link_coding_cap == DP_128b_132b_ENCODING) + fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B; + + return fec_overhead_multiplier_x1000; +} + +static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000) { u64 peak_kbps = kbps; peak_kbps *= 1006; - peak_kbps = div_u64(peak_kbps, 1000); + peak_kbps *= fec_overhead_multiplier_x1000; + peak_kbps = div_u64(peak_kbps, 1000 * 1000); return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } @@ -910,11 +923,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, int link_timeslots_used; int fair_pbn_alloc; int ret = 0; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { initial_slack[i] = - kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn; + kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn; bpp_increased[i] = false; remaining_to_increase += 1; } else { @@ -1010,6 +1024,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int next_index; int remaining_to_try = 0; int ret; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -1039,7 +1054,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, if (next_index == -1) break; - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1052,8 +1067,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - vars[next_index].pbn = kbps_to_peak_pbn( - params[next_index].bw_range.max_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1082,6 +1096,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, int count = 0; int i, k, ret; bool debugfs_overwrite = false; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); memset(params, 0, sizeof(params)); @@ -1146,7 +1161,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try no compression */ for (i = 0; i < count; i++) { vars[i + k].aconnector = params[i].aconnector; - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, @@ -1165,7 +1180,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try max compression */ for (i = 0; i < count; i++) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1173,7 +1188,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (ret < 0) return ret; } else { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1270,6 +1285,9 @@ static bool is_dsc_need_re_compute( } } + if (new_stream_on_link_num == 0) + return false; + /* check current_state if there stream on link but it is not in * new request state */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index fa84d34b7373..600d6e221011 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -46,6 +46,9 @@ #define SYNAPTICS_CASCADED_HUB_ID 0x5A #define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) +#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031 +#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000 + enum mst_msg_ready_type { NONE_MSG_RDY_EVENT = 0, DOWN_REP_MSG_RDY_EVENT = 1, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index a83bd0331c3b..a573a6639898 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -28,6 +28,7 @@ #include <drm/drm_blend.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_plane_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fourcc.h> #include "amdgpu.h" @@ -935,10 +936,14 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, } afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; + obj = drm_gem_fb_get_obj(new_state->fb, 0); + if (!obj) { + DRM_ERROR("Failed to get obj from framebuffer\n"); + return -EINVAL; + } + rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); - r = amdgpu_bo_reserve(rbo, true); if (r) { dev_err(adev->dev, "fail to reserve bo (%d)\n", r); @@ -1372,7 +1377,8 @@ void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane, adev->dm.dc->caps.color.dpp.gamma_corr) attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1; - attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; + if (afb) + attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; if (crtc_state->stream) { mutex_lock(&adev->dm.dc_lock); diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 80069651def3..8992e697759f 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -35,7 +35,6 @@ DC_LIBS += dcn201 DC_LIBS += dcn30 DC_LIBS += dcn301 DC_LIBS += dcn31 -DC_LIBS += dcn314 DC_LIBS += dml DC_LIBS += dml2 endif diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 506f82cd5cc6..88d3f9d7dd55 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -486,3 +486,30 @@ int dc_fixpt_s4d19(struct fixed31_32 arg) else return ux_dy(arg.value, 4, 19); } + +struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct fixed31_32 fixpt_value = dc_fixpt_zero; + struct fixed31_32 fixpt_int_value = dc_fixpt_zero; + long long frac_mask = ((long long)1 << (long long)integer_bits) - 1; + + fixpt_value.value = (long long)value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + frac_mask = frac_mask << fractional_bits; + fixpt_int_value.value = value & frac_mask; + fixpt_int_value.value <<= (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + fixpt_value.value |= fixpt_int_value.value; + return fixpt_value; +} + +struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits) +{ + struct fixed31_32 fixpt_value = dc_fixpt_from_int(int_value); + + fixpt_value.value |= (long long)frac_value << (FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits); + return fixpt_value; +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c index 78df96882d6e..f8409453434c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c @@ -195,7 +195,7 @@ void dce11_pplib_apply_display_requirements( * , then change minimum memory clock based on real-time bandwidth * limitation. */ - if ((dc->ctx->asic_id.chip_family == FAMILY_AI) && + if (dc->bw_vbios && (dc->ctx->asic_id.chip_family == FAMILY_AI) && ASICREV_IS_VEGA20_P(dc->ctx->asic_id.hw_internal_rev) && (context->stream_count >= 2)) { pp_display_cfg->min_memory_clock_khz = max(pp_display_cfg->min_memory_clock_khz, (uint32_t) div64_s64( diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 70ee0089a20d..0ce9b40dfc68 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -120,25 +120,40 @@ static int dcn35_get_active_display_cnt_wa( return display_count; } - static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower, bool disable) { struct dc *dc = clk_mgr_base->ctx->dc; int i; + if (dc->ctx->dce_environment == DCE_ENV_DIAG) + return; + for (i = 0; i < dc->res_pool->pipe_count; ++i) { + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; struct pipe_ctx *pipe = safe_to_lower ? &context->res_ctx.pipe_ctx[i] : &dc->current_state->res_ctx.pipe_ctx[i]; - + bool stream_changed_otg_dig_on = false; if (pipe->top_pipe || pipe->prev_odm_pipe) continue; + stream_changed_otg_dig_on = old_pipe->stream && new_pipe->stream && + old_pipe->stream != new_pipe->stream && + old_pipe->stream_res.tg == new_pipe->stream_res.tg && + new_pipe->stream->link_enc && !new_pipe->stream->dpms_off && + new_pipe->stream->link_enc->funcs->is_dig_enabled && + new_pipe->stream->link_enc->funcs->is_dig_enabled( + new_pipe->stream->link_enc) && + new_pipe->stream_res.stream_enc && + new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled && + new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc); if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || - !pipe->stream->link_enc)) { + !pipe->stream->link_enc) && !stream_changed_otg_dig_on) { + /* This w/a should not trigger when we have a dig active */ if (disable) { - if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->disable_crtc) - pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); + if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) + pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); } else { @@ -290,6 +305,9 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz) new_clocks->ref_dtbclk_khz = 600000; + if (dc->debug.min_disp_clk_khz > 0 && new_clocks->dispclk_khz < dc->debug.min_disp_clk_khz) + new_clocks->dispclk_khz = dc->debug.min_disp_clk_khz; + /* * if it is safe to lower, but we are already in the lower state, we don't have to do anything * also if safe to lower is false, we just go in the higher state diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 45fe17a46890..01ea3a31e54d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -14,6 +14,7 @@ #include "core_types.h" #include "dm_helpers.h" #include "link.h" +#include "dc_state_priv.h" #include "atomfirmware.h" #include "dcn401_smu14_driver_if.h" @@ -29,6 +30,7 @@ #define mmCLK01_CLK0_CLK2_DFS_CNTL 0x16E6F #define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E72 #define mmCLK01_CLK0_CLK4_DFS_CNTL 0x16E75 +#define mmCLK20_CLK2_CLK2_DFS_CNTL 0x1B051 #define CLK0_CLK_PLL_REQ__FbMult_int_MASK 0x000001ffUL #define CLK0_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000f000UL @@ -302,6 +304,197 @@ void dcn401_init_clocks(struct clk_mgr *clk_mgr_base) dcn401_build_wm_range_table(clk_mgr_base); } +static void dcn401_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, + struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + uint32_t dprefclk_did = 0; + uint32_t dcfclk_did = 0; + uint32_t dtbclk_did = 0; + uint32_t dispclk_did = 0; + uint32_t dppclk_did = 0; + uint32_t fclk_did = 0; + uint32_t target_div = 0; + + /* DFS Slice 0 is used for DISPCLK */ + dispclk_did = REG_READ(CLK0_CLK0_DFS_CNTL); + /* DFS Slice 1 is used for DPPCLK */ + dppclk_did = REG_READ(CLK0_CLK1_DFS_CNTL); + /* DFS Slice 2 is used for DPREFCLK */ + dprefclk_did = REG_READ(CLK0_CLK2_DFS_CNTL); + /* DFS Slice 3 is used for DCFCLK */ + dcfclk_did = REG_READ(CLK0_CLK3_DFS_CNTL); + /* DFS Slice 4 is used for DTBCLK */ + dtbclk_did = REG_READ(CLK0_CLK4_DFS_CNTL); + /* DFS Slice _ is used for FCLK */ + fclk_did = REG_READ(CLK2_CLK2_DFS_CNTL); + + /* Convert DISPCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dispclk_did); + //Get dispclk in khz + regs_and_bypass->dispclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DISPCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dppclk_did); + //Get dppclk in khz + regs_and_bypass->dppclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DPREFCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dprefclk_did); + //Get dprefclk in khz + regs_and_bypass->dprefclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DCFCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dcfclk_did); + //Get dcfclk in khz + regs_and_bypass->dcfclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DTBCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(dtbclk_did); + //Get dtbclk in khz + regs_and_bypass->dtbclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; + + /* Convert DTBCLK DFS Slice DID to divider*/ + target_div = dentist_get_divider_from_did(fclk_did); + //Get fclk in khz + regs_and_bypass->fclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->base.dentist_vco_freq_khz) / target_div; +} + +static bool dcn401_check_native_scaling(struct pipe_ctx *pipe) +{ + bool is_native_scaling = false; + int width = pipe->plane_state->src_rect.width; + int height = pipe->plane_state->src_rect.height; + + if (pipe->stream->timing.h_addressable == width && + pipe->stream->timing.v_addressable == height && + pipe->plane_state->dst_rect.width == width && + pipe->plane_state->dst_rect.height == height) + is_native_scaling = true; + + return is_native_scaling; +} + +static void dcn401_auto_dpm_test_log( + struct dc_clocks *new_clocks, + struct clk_mgr_internal *clk_mgr, + struct dc_state *context) +{ + unsigned int mall_ss_size_bytes; + int dramclk_khz_override, fclk_khz_override, num_fclk_levels; + + struct pipe_ctx *pipe_ctx_list[MAX_PIPES]; + int active_pipe_count = 0; + + for (int i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) { + pipe_ctx_list[active_pipe_count] = pipe_ctx; + active_pipe_count++; + } + } + + msleep(5); + + mall_ss_size_bytes = context->bw_ctx.bw.dcn.mall_ss_size_bytes; + + struct clk_log_info log_info = {0}; + struct clk_state_registers_and_bypass clk_register_dump; + + dcn401_dump_clk_registers(&clk_register_dump, &clk_mgr->base, &log_info); + + // Overrides for these clocks in case there is no p_state change support + dramclk_khz_override = new_clocks->dramclk_khz; + fclk_khz_override = new_clocks->fclk_khz; + + num_fclk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_fclk_levels - 1; + + if (!new_clocks->p_state_change_support) + dramclk_khz_override = clk_mgr->base.bw_params->max_memclk_mhz * 1000; + + if (!new_clocks->fclk_p_state_change_support) + fclk_khz_override = clk_mgr->base.bw_params->clk_table.entries[num_fclk_levels].fclk_mhz * 1000; + + + //////////////////////////////////////////////////////////////////////////// + // IMPORTANT: When adding more clocks to these logs, do NOT put a newline + // anywhere other than at the very end of the string. + // + // Formatting example (make sure to have " - " between each entry): + // + // AutoDPMTest: clk1:%d - clk2:%d - clk3:%d - clk4:%d\n" + //////////////////////////////////////////////////////////////////////////// + if (active_pipe_count > 0 && + new_clocks->dramclk_khz > 0 && + new_clocks->fclk_khz > 0 && + new_clocks->dcfclk_khz > 0 && + new_clocks->dppclk_khz > 0) { + + uint32_t pix_clk_list[MAX_PIPES] = {0}; + int p_state_list[MAX_PIPES] = {0}; + int disp_src_width_list[MAX_PIPES] = {0}; + int disp_src_height_list[MAX_PIPES] = {0}; + uint64_t disp_src_refresh_list[MAX_PIPES] = {0}; + bool is_scaled_list[MAX_PIPES] = {0}; + + for (int i = 0; i < active_pipe_count; i++) { + struct pipe_ctx *curr_pipe_ctx = pipe_ctx_list[i]; + uint64_t refresh_rate; + + pix_clk_list[i] = curr_pipe_ctx->stream->timing.pix_clk_100hz; + p_state_list[i] = curr_pipe_ctx->p_state_type; + + refresh_rate = (curr_pipe_ctx->stream->timing.pix_clk_100hz * (uint64_t)100 + + curr_pipe_ctx->stream->timing.v_total + * (uint64_t) curr_pipe_ctx->stream->timing.h_total - (uint64_t)1); + refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.v_total); + refresh_rate = div_u64(refresh_rate, curr_pipe_ctx->stream->timing.h_total); + disp_src_refresh_list[i] = refresh_rate; + + if (curr_pipe_ctx->plane_state) { + is_scaled_list[i] = !(dcn401_check_native_scaling(curr_pipe_ctx)); + disp_src_width_list[i] = curr_pipe_ctx->plane_state->src_rect.width; + disp_src_height_list[i] = curr_pipe_ctx->plane_state->src_rect.height; + } + } + + DC_LOG_AUTO_DPM_TEST("AutoDPMTest: dramclk:%d - fclk:%d - " + "dcfclk:%d - dppclk:%d - dispclk_hw:%d - " + "dppclk_hw:%d - dprefclk_hw:%d - dcfclk_hw:%d - " + "dtbclk_hw:%d - fclk_hw:%d - pix_clk_0:%d - pix_clk_1:%d - " + "pix_clk_2:%d - pix_clk_3:%d - mall_ss_size:%d - p_state_type_0:%d - " + "p_state_type_1:%d - p_state_type_2:%d - p_state_type_3:%d - " + "pix_width_0:%d - pix_height_0:%d - refresh_rate_0:%lld - is_scaled_0:%d - " + "pix_width_1:%d - pix_height_1:%d - refresh_rate_1:%lld - is_scaled_1:%d - " + "pix_width_2:%d - pix_height_2:%d - refresh_rate_2:%lld - is_scaled_2:%d - " + "pix_width_3:%d - pix_height_3:%d - refresh_rate_3:%lld - is_scaled_3:%d - LOG_END\n", + dramclk_khz_override, + fclk_khz_override, + new_clocks->dcfclk_khz, + new_clocks->dppclk_khz, + clk_register_dump.dispclk, + clk_register_dump.dppclk, + clk_register_dump.dprefclk, + clk_register_dump.dcfclk, + clk_register_dump.dtbclk, + clk_register_dump.fclk, + pix_clk_list[0], pix_clk_list[1], pix_clk_list[3], pix_clk_list[2], + mall_ss_size_bytes, + p_state_list[0], p_state_list[1], p_state_list[2], p_state_list[3], + disp_src_width_list[0], disp_src_height_list[0], disp_src_refresh_list[0], is_scaled_list[0], + disp_src_width_list[1], disp_src_height_list[1], disp_src_refresh_list[1], is_scaled_list[1], + disp_src_width_list[2], disp_src_height_list[2], disp_src_refresh_list[2], is_scaled_list[2], + disp_src_width_list[3], disp_src_height_list[3], disp_src_refresh_list[3], is_scaled_list[3]); + } +} + static void dcn401_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr, struct dc_state *context, int ref_dtbclk_khz) @@ -324,10 +517,12 @@ static void dcn401_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr if (!use_hpo_encoder) continue; - otg_master->clock_source->funcs->program_pix_clk( + if (otg_master->stream_res.pix_clk_params.controller_id > CONTROLLER_ID_UNDEFINED) + otg_master->clock_source->funcs->program_pix_clk( otg_master->clock_source, &otg_master->stream_res.pix_clk_params, - dccg->ctx->dc->link_srv->dp_get_encoding_format(&otg_master->link_config.dp_link_settings), + dccg->ctx->dc->link_srv->dp_get_encoding_format( + &otg_master->link_config.dp_link_settings), &otg_master->pll_settings); } } @@ -738,12 +933,12 @@ static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned static unsigned int dcn401_build_update_bandwidth_clocks_sequence( struct clk_mgr *clk_mgr_base, struct dc_state *context, + struct dc_clocks *new_clocks, bool safe_to_lower) { struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal); struct dc *dc = clk_mgr_base->ctx->dc; - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dcn401_clk_mgr_block_sequence *block_sequence = clk_mgr401->block_sequence; bool enter_display_off = false; bool update_active_fclk = false; @@ -1025,13 +1220,13 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( static unsigned int dcn401_build_update_display_clocks_sequence( struct clk_mgr *clk_mgr_base, struct dc_state *context, + struct dc_clocks *new_clocks, bool safe_to_lower) { struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dcn401_clk_mgr *clk_mgr401 = TO_DCN401_CLK_MGR(clk_mgr_internal); struct dc *dc = clk_mgr_base->ctx->dc; struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; - struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dcn401_clk_mgr_block_sequence *block_sequence = clk_mgr401->block_sequence; bool force_reset = false; bool update_dispclk = false; @@ -1182,6 +1377,7 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, /* build bandwidth related clocks update sequence */ num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base, context, + &context->bw_ctx.bw.dcn.clk, safe_to_lower); /* execute sequence */ @@ -1190,10 +1386,15 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base, /* build display related clocks update sequence */ num_steps = dcn401_build_update_display_clocks_sequence(clk_mgr_base, context, + &context->bw_ctx.bw.dcn.clk, safe_to_lower); /* execute sequence */ dcn401_execute_block_sequence(clk_mgr_base, num_steps); + + if (dc->config.enable_auto_dpm_test_logs) + dcn401_auto_dpm_test_log(&context->bw_ctx.bw.dcn.clk, TO_CLK_MGR_INTERNAL(clk_mgr_base), context); + } @@ -1218,59 +1419,6 @@ static uint32_t dcn401_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_m return dc_fixpt_floor(pll_req); } -static void dcn401_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, - struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) -{ - struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - uint32_t dprefclk_did = 0; - uint32_t dcfclk_did = 0; - uint32_t dtbclk_did = 0; - uint32_t dispclk_did = 0; - uint32_t dppclk_did = 0; - uint32_t target_div = 0; - - /* DFS Slice 0 is used for DISPCLK */ - dispclk_did = REG_READ(CLK0_CLK0_DFS_CNTL); - /* DFS Slice 1 is used for DPPCLK */ - dppclk_did = REG_READ(CLK0_CLK1_DFS_CNTL); - /* DFS Slice 2 is used for DPREFCLK */ - dprefclk_did = REG_READ(CLK0_CLK2_DFS_CNTL); - /* DFS Slice 3 is used for DCFCLK */ - dcfclk_did = REG_READ(CLK0_CLK3_DFS_CNTL); - /* DFS Slice 4 is used for DTBCLK */ - dtbclk_did = REG_READ(CLK0_CLK4_DFS_CNTL); - - /* Convert DISPCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dispclk_did); - //Get dispclk in khz - regs_and_bypass->dispclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DISPCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dppclk_did); - //Get dppclk in khz - regs_and_bypass->dppclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DPREFCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dprefclk_did); - //Get dprefclk in khz - regs_and_bypass->dprefclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DCFCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dcfclk_did); - //Get dcfclk in khz - regs_and_bypass->dcfclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; - - /* Convert DTBCLK DFS Slice DID to divider*/ - target_div = dentist_get_divider_from_did(dtbclk_did); - //Get dtbclk in khz - regs_and_bypass->dtbclk = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->base.dentist_vco_freq_khz) / target_div; -} - static void dcn401_clock_read_ss_info(struct clk_mgr_internal *clk_mgr) { struct dc_bios *bp = clk_mgr->base.ctx->dc_bios; @@ -1330,33 +1478,34 @@ static void dcn401_notify_wm_ranges(struct clk_mgr *clk_mgr_base) static void dcn401_set_hard_min_memclk(struct clk_mgr *clk_mgr_base, bool current_mode) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + const struct dc *dc = clk_mgr->base.ctx->dc; + struct dc_state *context = dc->current_state; + struct dc_clocks new_clocks; + int num_steps; if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) return; + /* build clock update */ + memcpy(&new_clocks, &clk_mgr_base->clks, sizeof(struct dc_clocks)); + if (current_mode) { - if (clk_mgr_base->clks.p_state_change_support) - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); - else - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->max_memclk_mhz); + new_clocks.dramclk_khz = context->bw_ctx.bw.dcn.clk.dramclk_khz; + new_clocks.idle_dramclk_khz = context->bw_ctx.bw.dcn.clk.idle_dramclk_khz; + new_clocks.p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; } else { - dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz); + new_clocks.dramclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz * 1000; + new_clocks.idle_dramclk_khz = new_clocks.dramclk_khz; + new_clocks.p_state_change_support = true; } -} - -/* Set max memclk to highest DPM value */ -static void dcn401_set_hard_max_memclk(struct clk_mgr *clk_mgr_base) -{ - struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - if (!clk_mgr->smu_present || !dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK)) - return; + num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base, + context, + &new_clocks, + true); - dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->bw_params->max_memclk_mhz); + /* execute sequence */ + dcn401_execute_block_sequence(clk_mgr_base, num_steps); } /* Get current memclk states, update bounding box */ @@ -1487,7 +1636,6 @@ static struct clk_mgr_funcs dcn401_funcs = { .init_clocks = dcn401_init_clocks, .notify_wm_ranges = dcn401_notify_wm_ranges, .set_hard_min_memclk = dcn401_set_hard_min_memclk, - .set_hard_max_memclk = dcn401_set_hard_max_memclk, .get_memclk_states_from_smu = dcn401_get_memclk_states_from_smu, .are_clock_states_equal = dcn401_are_clock_states_equal, .enable_pme_wa = dcn401_enable_pme_wa, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 85a2ef82afa5..c8dabb081b3d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1254,7 +1254,8 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context); if (pipe->stream && pipe->plane_state) { - set_p_state_switch_method(dc, context, pipe); + if (!dc->debug.using_dml2) + set_p_state_switch_method(dc, context, pipe); dc_update_visual_confirm_color(dc, context, pipe); } @@ -1351,80 +1352,6 @@ static void disable_vbios_mode_if_required( } } -/** - * wait_for_blank_complete - wait for all active OPPs to finish pending blank - * pattern updates - * - * @dc: [in] dc reference - * @context: [in] hardware context in use - */ -static void wait_for_blank_complete(struct dc *dc, - struct dc_state *context) -{ - struct pipe_ctx *opp_head; - struct dce_hwseq *hws = dc->hwseq; - int i; - - if (!hws->funcs.wait_for_blank_complete) - return; - - for (i = 0; i < MAX_PIPES; i++) { - opp_head = &context->res_ctx.pipe_ctx[i]; - - if (!resource_is_pipe_type(opp_head, OPP_HEAD) || - dc_state_get_pipe_subvp_type(context, opp_head) == SUBVP_PHANTOM) - continue; - - hws->funcs.wait_for_blank_complete(opp_head->stream_res.opp); - } -} - -static void wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *context) -{ - struct pipe_ctx *otg_master; - struct timing_generator *tg; - int i; - - for (i = 0; i < MAX_PIPES; i++) { - otg_master = &context->res_ctx.pipe_ctx[i]; - if (!resource_is_pipe_type(otg_master, OTG_MASTER) || - dc_state_get_pipe_subvp_type(context, otg_master) == SUBVP_PHANTOM) - continue; - tg = otg_master->stream_res.tg; - if (tg->funcs->wait_odm_doublebuffer_pending_clear) - tg->funcs->wait_odm_doublebuffer_pending_clear(tg); - } - - /* ODM update may require to reprogram blank pattern for each OPP */ - wait_for_blank_complete(dc, context); -} - -static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) -{ - int i; - PERF_TRACE(); - for (i = 0; i < MAX_PIPES; i++) { - int count = 0; - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) - continue; - - /* Timeout 100 ms */ - while (count < 100000) { - /* Must set to false to start with, due to OR in update function */ - pipe->plane_state->status.is_flip_pending = false; - dc->hwss.update_pending_status(pipe); - if (!pipe->plane_state->status.is_flip_pending) - break; - udelay(1); - count++; - } - ASSERT(!pipe->plane_state->status.is_flip_pending); - } - PERF_TRACE(); -} - /* Public functions */ struct dc *dc_create(const struct dc_init_data *init_params) @@ -1822,10 +1749,18 @@ bool dc_validate_boot_timing(const struct dc *dc, tg->funcs->get_optc_source(tg, &numOdmPipes, &id_src[0], &id_src[1]); - if (numOdmPipes == 2) + if (numOdmPipes == 2) { pix_clk_100hz *= 2; - if (numOdmPipes == 4) + } else if (numOdmPipes == 4) { pix_clk_100hz *= 4; + } else if (se && se->funcs->get_pixels_per_cycle) { + uint32_t pixels_per_cycle = se->funcs->get_pixels_per_cycle(se); + + if (pixels_per_cycle != 1 && !dc->debug.enable_dp_dig_pixel_rate_div_policy) + return false; + + pix_clk_100hz *= pixels_per_cycle; + } // Note: In rare cases, HW pixclk may differ from crtc's pixclk // slightly due to rounding issues in 10 kHz units. @@ -2100,12 +2035,12 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c if (context->stream_count > get_seamless_boot_stream_count(context) || context->stream_count == 0) { /* Must wait for no flips to be pending before doing optimize bw */ - wait_for_no_pipes_pending(dc, context); + hwss_wait_for_no_pipes_pending(dc, context); /* * optimized dispclk depends on ODM setup. Need to wait for ODM * update pending complete before optimizing bandwidth. */ - wait_for_odm_update_pending_complete(dc, context); + hwss_wait_for_odm_update_pending_complete(dc, context); /* pplib is notified if disp_num changed */ dc->hwss.optimize_bandwidth(dc, context); /* Need to do otg sync again as otg could be out of sync due to otg @@ -2716,6 +2651,10 @@ static enum surface_update_type check_update_surfaces_for_stream( overall_type = UPDATE_TYPE_FULL; } + if (stream_update && stream_update->hw_cursor_req) { + overall_type = UPDATE_TYPE_FULL; + } + /* some stream updates require passive update */ if (stream_update) { union stream_update_flags *su_flags = &stream_update->stream->update_flags; @@ -3011,6 +2950,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_infopacket) stream->vrr_infopacket = *update->vrr_infopacket; + if (update->hw_cursor_req) + stream->hw_cursor_req = *update->hw_cursor_req; + if (update->allow_freesync) stream->allow_freesync = *update->allow_freesync; @@ -3704,7 +3646,8 @@ static void commit_planes_for_stream_fast(struct dc *dc, struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->stream && pipe->plane_state) { - set_p_state_switch_method(dc, context, pipe); + if (!dc->debug.using_dml2) + set_p_state_switch_method(dc, context, pipe); if (dc->debug.visual_confirm) dc_update_visual_confirm_color(dc, context, pipe); @@ -3739,7 +3682,7 @@ static void commit_planes_for_stream_fast(struct dc *dc, surface_count, stream, context); - } else { + } else if (stream_status) { build_dmub_cmd_list(dc, srf_updates, surface_count, @@ -3769,47 +3712,6 @@ static void commit_planes_for_stream_fast(struct dc *dc, top_pipe_to_program->stream->update_flags.raw = 0; } -static void wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) -{ -/* - * This function calls HWSS to wait for any potentially double buffered - * operations to complete. It should be invoked as a pre-amble prior - * to full update programming before asserting any HW locks. - */ - int pipe_idx; - int opp_inst; - int opp_count = dc->res_pool->res_cap->num_opp; - struct hubp *hubp; - int mpcc_inst; - const struct pipe_ctx *pipe_ctx; - - for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) { - pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx]; - - if (!pipe_ctx->stream) - continue; - - if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear) - pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg); - - hubp = pipe_ctx->plane_res.hubp; - if (!hubp) - continue; - - mpcc_inst = hubp->inst; - // MPCC inst is equal to pipe index in practice - for (opp_inst = 0; opp_inst < opp_count; opp_inst++) { - if ((dc->res_pool->opps[opp_inst] != NULL) && - (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst])) { - dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst); - dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false; - break; - } - } - } - wait_for_odm_update_pending_complete(dc, dc_context); -} - static void commit_planes_for_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, @@ -3833,13 +3735,14 @@ static void commit_planes_for_stream(struct dc *dc, dc_z10_restore(dc); if (update_type == UPDATE_TYPE_FULL) - wait_for_outstanding_hw_updates(dc, context); + hwss_process_outstanding_hw_updates(dc, dc->current_state); for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->stream && pipe->plane_state) { - set_p_state_switch_method(dc, context, pipe); + if (!dc->debug.using_dml2) + set_p_state_switch_method(dc, context, pipe); if (dc->debug.visual_confirm) dc_update_visual_confirm_color(dc, context, pipe); @@ -4127,7 +4030,8 @@ static void commit_planes_for_stream(struct dc *dc, } if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) - if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { + if (top_pipe_to_program && + top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { top_pipe_to_program->stream_res.tg->funcs->wait_for_state( top_pipe_to_program->stream_res.tg, CRTC_STATE_VACTIVE); @@ -4335,7 +4239,8 @@ static void backup_and_set_minimal_pipe_split_policy(struct dc *dc, dc->debug.force_disable_subvp = true; for (i = 0; i < context->stream_count; i++) { policy->force_odm[i] = context->streams[i]->debug.force_odm_combine_segments; - context->streams[i]->debug.force_odm_combine_segments = 0; + if (context->streams[i]->debug.allow_transition_for_forced_odm) + context->streams[i]->debug.force_odm_combine_segments = 0; } } @@ -4686,7 +4591,7 @@ static bool commit_minimal_transition_state(struct dc *dc, return true; } -static void populate_fast_updates(struct dc_fast_update *fast_update, +void populate_fast_updates(struct dc_fast_update *fast_update, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_update *stream_update) @@ -4696,6 +4601,9 @@ static void populate_fast_updates(struct dc_fast_update *fast_update, if (stream_update) { fast_update[0].out_transfer_func = stream_update->out_transfer_func; fast_update[0].output_csc_transform = stream_update->output_csc_transform; + } else { + fast_update[0].out_transfer_func = NULL; + fast_update[0].output_csc_transform = NULL; } for (i = 0; i < surface_count; i++) { @@ -4729,6 +4637,26 @@ static bool fast_updates_exist(struct dc_fast_update *fast_update, int surface_c return false; } +bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_count) +{ + int i; + + if (fast_update[0].out_transfer_func || + fast_update[0].output_csc_transform) + return true; + + for (i = 0; i < surface_count; i++) { + if (fast_update[i].input_csc_color_matrix || + fast_update[i].gamma || + fast_update[i].gamut_remap_matrix || + fast_update[i].coeff_reduction_factor || + fast_update[i].cursor_csc_color_matrix) + return true; + } + + return false; +} + static bool full_update_required(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, @@ -5382,7 +5310,8 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const if (allow == dc->idle_optimizations_allowed) return; - if (dc->hwss.apply_idle_power_optimizations && dc->hwss.apply_idle_power_optimizations(dc, allow)) + if (dc->hwss.apply_idle_power_optimizations && dc->clk_mgr != NULL && + dc->hwss.apply_idle_power_optimizations(dc, allow)) dc->idle_optimizations_allowed = allow; } @@ -5451,9 +5380,10 @@ static void blank_and_force_memclk(struct dc *dc, bool apply, unsigned int memcl hubp->funcs->set_blank_regs(hubp, true); } } - - dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz); - dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz); + if (dc->clk_mgr->funcs->set_max_memclk) + dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, memclk_mhz); + if (dc->clk_mgr->funcs->set_min_memclk) + dc->clk_mgr->funcs->set_min_memclk(dc->clk_mgr, memclk_mhz); for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; @@ -5502,7 +5432,7 @@ void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable) if (enable && !dc->clk_mgr->dc_mode_softmax_enabled) { if (p_state_change_support) { - if (funcMin <= softMax) + if (funcMin <= softMax && dc->clk_mgr->funcs->set_max_memclk) dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, softMax); // else: No-Op } else { @@ -5512,7 +5442,7 @@ void dc_enable_dcmode_clk_limit(struct dc *dc, bool enable) } } else if (!enable && dc->clk_mgr->dc_mode_softmax_enabled) { if (p_state_change_support) { - if (funcMin <= softMax) + if (funcMin <= softMax && dc->clk_mgr->funcs->set_max_memclk) dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, maxDPM); // else: No-Op } else { @@ -5563,6 +5493,9 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc) */ bool dc_is_dmub_outbox_supported(struct dc *dc) { + if (!dc->caps.dmcub_support) + return false; + switch (dc->ctx->asic_id.chip_family) { case FAMILY_YELLOW_CARP: diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 87e36d51c56d..7ee2be8f82c4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -636,57 +636,59 @@ void hwss_build_fast_sequence(struct dc *dc, while (current_pipe) { current_mpc_pipe = current_pipe; while (current_mpc_pipe) { - if (dc->hwss.set_flip_control_gsl && current_mpc_pipe->plane_state && current_mpc_pipe->plane_state->update_flags.raw) { - block_sequence[*num_steps].params.set_flip_control_gsl_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].params.set_flip_control_gsl_params.flip_immediate = current_mpc_pipe->plane_state->flip_immediate; - block_sequence[*num_steps].func = HUBP_SET_FLIP_CONTROL_GSL; - (*num_steps)++; - } - if (dc->hwss.program_triplebuffer && dc->debug.enable_tri_buf && current_mpc_pipe->plane_state->update_flags.raw) { - block_sequence[*num_steps].params.program_triplebuffer_params.dc = dc; - block_sequence[*num_steps].params.program_triplebuffer_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].params.program_triplebuffer_params.enableTripleBuffer = current_mpc_pipe->plane_state->triplebuffer_flips; - block_sequence[*num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER; - (*num_steps)++; - } - if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) { - if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) && - stream_status->mall_stream_config.type == SUBVP_MAIN) { - block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; - block_sequence[*num_steps].params.subvp_save_surf_addr.addr = ¤t_mpc_pipe->plane_state->address; - block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index; - block_sequence[*num_steps].func = DMUB_SUBVP_SAVE_SURF_ADDR; + if (current_mpc_pipe->plane_state) { + if (dc->hwss.set_flip_control_gsl && current_mpc_pipe->plane_state->update_flags.raw) { + block_sequence[*num_steps].params.set_flip_control_gsl_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.set_flip_control_gsl_params.flip_immediate = current_mpc_pipe->plane_state->flip_immediate; + block_sequence[*num_steps].func = HUBP_SET_FLIP_CONTROL_GSL; + (*num_steps)++; + } + if (dc->hwss.program_triplebuffer && dc->debug.enable_tri_buf && current_mpc_pipe->plane_state->update_flags.raw) { + block_sequence[*num_steps].params.program_triplebuffer_params.dc = dc; + block_sequence[*num_steps].params.program_triplebuffer_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.program_triplebuffer_params.enableTripleBuffer = current_mpc_pipe->plane_state->triplebuffer_flips; + block_sequence[*num_steps].func = HUBP_PROGRAM_TRIPLEBUFFER; + (*num_steps)++; + } + if (dc->hwss.update_plane_addr && current_mpc_pipe->plane_state->update_flags.bits.addr_update) { + if (resource_is_pipe_type(current_mpc_pipe, OTG_MASTER) && + stream_status->mall_stream_config.type == SUBVP_MAIN) { + block_sequence[*num_steps].params.subvp_save_surf_addr.dc_dmub_srv = dc->ctx->dmub_srv; + block_sequence[*num_steps].params.subvp_save_surf_addr.addr = ¤t_mpc_pipe->plane_state->address; + block_sequence[*num_steps].params.subvp_save_surf_addr.subvp_index = current_mpc_pipe->subvp_index; + block_sequence[*num_steps].func = DMUB_SUBVP_SAVE_SURF_ADDR; + (*num_steps)++; + } + + block_sequence[*num_steps].params.update_plane_addr_params.dc = dc; + block_sequence[*num_steps].params.update_plane_addr_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = HUBP_UPDATE_PLANE_ADDR; (*num_steps)++; } - block_sequence[*num_steps].params.update_plane_addr_params.dc = dc; - block_sequence[*num_steps].params.update_plane_addr_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = HUBP_UPDATE_PLANE_ADDR; - (*num_steps)++; - } - - if (hws->funcs.set_input_transfer_func && current_mpc_pipe->plane_state->update_flags.bits.gamma_change) { - block_sequence[*num_steps].params.set_input_transfer_func_params.dc = dc; - block_sequence[*num_steps].params.set_input_transfer_func_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].params.set_input_transfer_func_params.plane_state = current_mpc_pipe->plane_state; - block_sequence[*num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC; - (*num_steps)++; - } + if (hws->funcs.set_input_transfer_func && current_mpc_pipe->plane_state->update_flags.bits.gamma_change) { + block_sequence[*num_steps].params.set_input_transfer_func_params.dc = dc; + block_sequence[*num_steps].params.set_input_transfer_func_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.set_input_transfer_func_params.plane_state = current_mpc_pipe->plane_state; + block_sequence[*num_steps].func = DPP_SET_INPUT_TRANSFER_FUNC; + (*num_steps)++; + } - if (dc->hwss.program_gamut_remap && current_mpc_pipe->plane_state->update_flags.bits.gamut_remap_change) { - block_sequence[*num_steps].params.program_gamut_remap_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = DPP_PROGRAM_GAMUT_REMAP; - (*num_steps)++; - } - if (current_mpc_pipe->plane_state->update_flags.bits.input_csc_change) { - block_sequence[*num_steps].params.setup_dpp_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = DPP_SETUP_DPP; - (*num_steps)++; - } - if (current_mpc_pipe->plane_state->update_flags.bits.coeff_reduction_change) { - block_sequence[*num_steps].params.program_bias_and_scale_params.pipe_ctx = current_mpc_pipe; - block_sequence[*num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE; - (*num_steps)++; + if (dc->hwss.program_gamut_remap && current_mpc_pipe->plane_state->update_flags.bits.gamut_remap_change) { + block_sequence[*num_steps].params.program_gamut_remap_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = DPP_PROGRAM_GAMUT_REMAP; + (*num_steps)++; + } + if (current_mpc_pipe->plane_state->update_flags.bits.input_csc_change) { + block_sequence[*num_steps].params.setup_dpp_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = DPP_SETUP_DPP; + (*num_steps)++; + } + if (current_mpc_pipe->plane_state->update_flags.bits.coeff_reduction_change) { + block_sequence[*num_steps].params.program_bias_and_scale_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].func = DPP_PROGRAM_BIAS_AND_SCALE; + (*num_steps)++; + } } if (hws->funcs.set_output_transfer_func && current_mpc_pipe->stream->update_flags.bits.out_tf) { block_sequence[*num_steps].params.set_output_transfer_func_params.dc = dc; @@ -901,12 +903,12 @@ void hwss_program_bias_and_scale(union block_sequence_params *params) struct pipe_ctx *pipe_ctx = params->program_bias_and_scale_params.pipe_ctx; struct dpp *dpp = pipe_ctx->plane_res.dpp; struct dc_plane_state *plane_state = pipe_ctx->plane_state; - struct dc_bias_and_scale bns_params = {0}; + struct dc_bias_and_scale bns_params = plane_state->bias_and_scale; //TODO :for CNVC set scale and bias registers if necessary - build_prescale_params(&bns_params, plane_state); - if (dpp->funcs->dpp_program_bias_and_scale) + if (dpp->funcs->dpp_program_bias_and_scale) { dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); + } } void hwss_power_on_mpc_mem_pwr(union block_sequence_params *params) @@ -976,3 +978,126 @@ void get_surface_tile_visual_confirm_color( break; } } + +/** + * hwss_wait_for_all_blank_complete - wait for all active OPPs to finish pending blank + * pattern updates + * + * @dc: [in] dc reference + * @context: [in] hardware context in use + */ +void hwss_wait_for_all_blank_complete(struct dc *dc, + struct dc_state *context) +{ + struct pipe_ctx *opp_head; + struct dce_hwseq *hws = dc->hwseq; + int i; + + if (!hws->funcs.wait_for_blank_complete) + return; + + for (i = 0; i < MAX_PIPES; i++) { + opp_head = &context->res_ctx.pipe_ctx[i]; + + if (!resource_is_pipe_type(opp_head, OPP_HEAD) || + dc_state_get_pipe_subvp_type(context, opp_head) == SUBVP_PHANTOM) + continue; + + hws->funcs.wait_for_blank_complete(opp_head->stream_res.opp); + } +} + +void hwss_wait_for_odm_update_pending_complete(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *otg_master; + struct timing_generator *tg; + int i; + + for (i = 0; i < MAX_PIPES; i++) { + otg_master = &context->res_ctx.pipe_ctx[i]; + if (!resource_is_pipe_type(otg_master, OTG_MASTER) || + dc_state_get_pipe_subvp_type(context, otg_master) == SUBVP_PHANTOM) + continue; + tg = otg_master->stream_res.tg; + if (tg->funcs->wait_odm_doublebuffer_pending_clear) + tg->funcs->wait_odm_doublebuffer_pending_clear(tg); + } + + /* ODM update may require to reprogram blank pattern for each OPP */ + hwss_wait_for_all_blank_complete(dc, context); +} + +void hwss_wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) +{ + int i; + for (i = 0; i < MAX_PIPES; i++) { + int count = 0; + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->plane_state || dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) + continue; + + /* Timeout 100 ms */ + while (count < 100000) { + /* Must set to false to start with, due to OR in update function */ + pipe->plane_state->status.is_flip_pending = false; + dc->hwss.update_pending_status(pipe); + if (!pipe->plane_state->status.is_flip_pending) + break; + udelay(1); + count++; + } + ASSERT(!pipe->plane_state->status.is_flip_pending); + } +} + +void hwss_wait_for_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) +{ +/* + * This function calls HWSS to wait for any potentially double buffered + * operations to complete. It should be invoked as a pre-amble prior + * to full update programming before asserting any HW locks. + */ + int pipe_idx; + int opp_inst; + int opp_count = dc->res_pool->res_cap->num_opp; + struct hubp *hubp; + int mpcc_inst; + const struct pipe_ctx *pipe_ctx; + + for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) { + pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx]; + + if (!pipe_ctx->stream) + continue; + + if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear) + pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg); + + hubp = pipe_ctx->plane_res.hubp; + if (!hubp) + continue; + + mpcc_inst = hubp->inst; + // MPCC inst is equal to pipe index in practice + for (opp_inst = 0; opp_inst < opp_count; opp_inst++) { + if ((dc->res_pool->opps[opp_inst] != NULL) && + (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst])) { + dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst); + dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false; + break; + } + } + } + hwss_wait_for_odm_update_pending_complete(dc, dc_context); +} + +void hwss_process_outstanding_hw_updates(struct dc *dc, struct dc_state *dc_context) +{ + /* wait for outstanding updates */ + hwss_wait_for_outstanding_hw_updates(dc, dc_context); + + /* perform outstanding post update programming */ + if (dc->hwss.program_outstanding_updates) + dc->hwss.program_outstanding_updates(dc, dc_context); +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index bcb5267b5a6b..ef585a89847b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -342,11 +342,6 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc, res_pool->ref_clocks.xtalin_clock_inKhz; res_pool->ref_clocks.dchub_ref_clock_inKhz = res_pool->ref_clocks.xtalin_clock_inKhz; - if (dc->debug.using_dml2) - if (res_pool->hubbub && res_pool->hubbub->funcs->get_dchub_ref_freq) - res_pool->hubbub->funcs->get_dchub_ref_freq(res_pool->hubbub, - res_pool->ref_clocks.dccg_ref_clock_inKhz, - &res_pool->ref_clocks.dchub_ref_clock_inKhz); } else ASSERT_CRITICAL(false); } @@ -3241,6 +3236,8 @@ static bool are_stream_backends_same( bool dc_is_stream_unchanged( struct dc_stream_state *old_stream, struct dc_stream_state *stream) { + if (!old_stream || !stream) + return false; if (!are_stream_backends_same(old_stream, stream)) return false; @@ -3771,8 +3768,10 @@ static bool planes_changed_for_existing_stream(struct dc_state *context, } } - if (!stream_status) + if (!stream_status) { ASSERT(0); + return false; + } for (i = 0; i < set_count; i++) if (set[i].stream == stream) @@ -5164,7 +5163,7 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy( sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; if (sec_pipe->stream->timing.flags.DSC == 1) { #if defined(CONFIG_DRM_AMD_DC_FP) - dcn20_acquire_dsc(dc, &state->res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); + dcn20_acquire_dsc(dc, &state->res_ctx, &sec_pipe->stream_res.dsc, sec_pipe->stream_res.opp->inst); #endif ASSERT(sec_pipe->stream_res.dsc); if (sec_pipe->stream_res.dsc == NULL) @@ -5271,3 +5270,44 @@ void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuratio dml2_options->svp_pstate.callbacks.remove_phantom_streams_and_planes = &dc_state_remove_phantom_streams_and_planes; dml2_options->svp_pstate.callbacks.release_phantom_streams_and_planes = &dc_state_release_phantom_streams_and_planes; } + +/* Returns number of DET segments allocated for a given OTG_MASTER pipe */ +int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master) +{ + struct pipe_ctx *opp_heads[MAX_PIPES]; + struct pipe_ctx *dpp_pipes[MAX_PIPES]; + + int dpp_count = 0; + int det_segments = 0; + + if (!otg_master->stream) + return 0; + + int slice_count = resource_get_opp_heads_for_otg_master(otg_master, + &state->res_ctx, opp_heads); + + for (int slice_idx = 0; slice_idx < slice_count; slice_idx++) { + if (opp_heads[slice_idx]->plane_state) { + dpp_count = resource_get_dpp_pipes_for_opp_head( + opp_heads[slice_idx], + &state->res_ctx, + dpp_pipes); + for (int dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++) + det_segments += dpp_pipes[dpp_idx]->hubp_regs.det_size; + } + } + return det_segments; +} + +bool resource_is_hpo_acquired(struct dc_state *context) +{ + int i; + + for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { + if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i]) { + return true; + } + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c index cd6570a1e20e..fe9f99f1bdf9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c @@ -61,6 +61,7 @@ void dc_stat_get_dmub_notification(const struct dc *dc, struct dmub_notification /* For HPD/HPD RX, convert dpia port index into link index */ if (notify->type == DMUB_NOTIFICATION_HPD || notify->type == DMUB_NOTIFICATION_HPD_IRQ || + notify->type == DMUB_NOTIFICATION_AUX_REPLY || notify->type == DMUB_NOTIFICATION_DPIA_NOTIFICATION || notify->type == DMUB_NOTIFICATION_SET_CONFIG_REPLY) { notify->link_index = diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index e990346e51f6..2597e3fd562b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -211,10 +211,16 @@ struct dc_state *dc_state_create(struct dc *dc, struct dc_state_create_params *p #ifdef CONFIG_DRM_AMD_DC_FP if (dc->debug.using_dml2) { dml2_opt->use_clock_dc_limits = false; - dml2_create(dc, dml2_opt, &state->bw_ctx.dml2); + if (!dml2_create(dc, dml2_opt, &state->bw_ctx.dml2)) { + dc_state_release(state); + return NULL; + } dml2_opt->use_clock_dc_limits = true; - dml2_create(dc, dml2_opt, &state->bw_ctx.dml2_dc_power_source); + if (!dml2_create(dc, dml2_opt, &state->bw_ctx.dml2_dc_power_source)) { + dc_state_release(state); + return NULL; + } } #endif @@ -961,10 +967,10 @@ bool dc_state_is_fams2_in_use( bool is_fams2_in_use = false; if (state) - is_fams2_in_use |= state->bw_ctx.bw.dcn.fams2_stream_count > 0; + is_fams2_in_use |= state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; if (dc->current_state) - is_fams2_in_use |= dc->current_state->bw_ctx.bw.dcn.fams2_stream_count > 0; + is_fams2_in_use |= dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; return is_fams2_in_use; } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 73cdebcd9f37..5bbc7d2daca6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.291" +#define DC_VER "3.2.297" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -261,10 +261,7 @@ struct dc_caps { bool zstate_support; bool ips_support; uint32_t num_of_internal_disp; - uint32_t max_dwb_htap; - uint32_t max_dwb_vtap; enum dp_protocol_version max_dp_protocol_version; - bool spdif_aud; unsigned int mall_size_per_mem_channel; unsigned int mall_size_total; unsigned int cursor_cache_size; @@ -309,8 +306,6 @@ struct dc_bug_wa { uint8_t dcfclk_ds: 1; } clock_update_disable_mask; bool skip_psr_ips_crtc_disable; - //Customer Specific WAs - uint32_t force_backlight_start_level; }; struct dc_dcc_surface_param { struct dc_size surface_size; @@ -466,6 +461,7 @@ struct dc_config { bool use_assr_psp_message; bool support_edp0_on_dp1; unsigned int enable_fpo_flicker_detection; + bool disable_hbr_audio_dp2; }; enum visual_confirm { @@ -513,6 +509,7 @@ enum in_game_fams_config { INGAME_FAMS_SINGLE_DISP_ENABLE, // enable in-game fams INGAME_FAMS_DISABLE, // disable in-game fams INGAME_FAMS_MULTI_DISP_ENABLE, //enable in-game fams for multi-display + INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY, //enable in-game fams for multi-display only for clamped RR strategies }; /** @@ -981,6 +978,7 @@ struct dc_debug_options { bool disable_z10; bool enable_z9_disable_interface; bool psr_skip_crtc_disable; + uint32_t ips_skip_crtc_disable_mask; union dpia_debug_options dpia_debug; bool disable_fixed_vs_aux_timeout_wa; uint32_t fixed_vs_aux_delay_config_wa; @@ -1055,6 +1053,7 @@ struct dc_debug_options { unsigned int force_sharpness; unsigned int force_lls; bool notify_dpia_hr_bw; + bool enable_ips_visual_confirm; }; @@ -1291,7 +1290,7 @@ struct dc_plane_state { struct dc_gamma gamma_correction; struct dc_transfer_func in_transfer_func; - struct dc_bias_and_scale *bias_and_scale; + struct dc_bias_and_scale bias_and_scale; struct dc_csc_transform input_csc_color_matrix; struct fixed31_32 coeff_reduction_factor; struct fixed31_32 hdr_mult; @@ -1368,7 +1367,6 @@ struct dc_plane_info { int global_alpha_value; bool input_csc_enabled; int layer_index; - bool front_buffer_rendering_active; enum chroma_cositing cositing; }; @@ -1585,6 +1583,12 @@ bool dc_acquire_release_mpc_3dlut( bool dc_resource_is_dsc_encoding_supported(const struct dc *dc); void get_audio_check(struct audio_info *aud_modes, struct audio_check *aud_chk); + +bool fast_nonaddr_updates_exist(struct dc_fast_update *fast_update, int surface_count); +void populate_fast_updates(struct dc_fast_update *fast_update, + struct dc_surface_update *srf_updates, + int surface_count, + struct dc_stream_update *stream_update); /* * Set up streams and links associated to drive sinks * The streams parameter is an absolute set of all active streams. @@ -1756,6 +1760,7 @@ struct dc_link { bool dongle_mode_timing_override; bool blank_stream_on_ocs_change; bool read_dpcd204h_on_irq_hpd; + bool disable_assr_for_uhbr; } wa_flags; struct link_mst_stream_allocation_table mst_stream_alloc_table; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index ded13026c8ff..b1265124608b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -979,6 +979,9 @@ void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv) DC_LOG_DEBUG(" inbox0_rptr : %08x", diag_data.inbox0_rptr); DC_LOG_DEBUG(" inbox0_wptr : %08x", diag_data.inbox0_wptr); DC_LOG_DEBUG(" inbox0_size : %08x", diag_data.inbox0_size); + DC_LOG_DEBUG(" outbox1_rptr : %08x", diag_data.outbox1_rptr); + DC_LOG_DEBUG(" outbox1_wptr : %08x", diag_data.outbox1_wptr); + DC_LOG_DEBUG(" outbox1_size : %08x", diag_data.outbox1_size); DC_LOG_DEBUG(" is_enabled : %d", diag_data.is_dmcub_enabled); DC_LOG_DEBUG(" is_soft_reset : %d", diag_data.is_dmcub_soft_reset); DC_LOG_DEBUG(" is_secure_reset : %d", diag_data.is_dmcub_secure_reset); @@ -1282,7 +1285,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) union dmub_shared_state_ips_driver_signals new_signals; DC_LOG_IPS( - "%s wait idle (ips1_commit=%d ips2_commit=%d)", + "%s wait idle (ips1_commit=%u ips2_commit=%u)", __func__, ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1328,7 +1331,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) } DC_LOG_IPS( - "%s send allow_idle=%d (ips1_commit=%d ips2_commit=%d)", + "%s send allow_idle=%d (ips1_commit=%u ips2_commit=%u)", __func__, allow_idle, ips_fw->signals.bits.ips1_commit, @@ -1371,7 +1374,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc_dmub_srv->driver_signals = ips_driver->signals; DC_LOG_IPS( - "%s (allow ips1=%d ips2=%d) (commit ips1=%d ips2=%d) (count rcg=%d ips1=%d ips2=%d)", + "%s (allow ips1=%u ips2=%u) (commit ips1=%u ips2=%u) (count rcg=%u ips1=%u ips2=%u)", __func__, ips_driver->signals.bits.allow_ips1, ips_driver->signals.bits.allow_ips2, @@ -1390,7 +1393,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) (!dc->debug.optimize_ips_handshake || ips_fw->signals.bits.ips2_commit || !ips_fw->signals.bits.in_idle)) { DC_LOG_IPS( - "wait IPS2 eval (ips1_commit=%d ips2_commit=%d)", + "wait IPS2 eval (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1399,7 +1402,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (ips_fw->signals.bits.ips2_commit) { DC_LOG_IPS( - "exit IPS2 #1 (ips1_commit=%d ips2_commit=%d)", + "exit IPS2 #1 (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1407,7 +1410,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); DC_LOG_IPS( - "wait IPS2 entry delay (ips1_commit=%d ips2_commit=%d)", + "wait IPS2 entry delay (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1415,14 +1418,14 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(dc->debug.ips2_entry_delay_us); DC_LOG_IPS( - "exit IPS2 #2 (ips1_commit=%d ips2_commit=%d)", + "exit IPS2 #2 (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr); DC_LOG_IPS( - "wait IPS2 commit clear (ips1_commit=%d ips2_commit=%d)", + "wait IPS2 commit clear (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1430,7 +1433,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(1); DC_LOG_IPS( - "wait hw_pwr_up (ips1_commit=%d ips2_commit=%d)", + "wait hw_pwr_up (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1438,7 +1441,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) ASSERT(0); DC_LOG_IPS( - "resync inbox1 (ips1_commit=%d ips2_commit=%d)", + "resync inbox1 (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1449,7 +1452,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc_dmub_srv_notify_idle(dc, false); if (prev_driver_signals.bits.allow_ips1) { DC_LOG_IPS( - "wait for IPS1 commit clear (ips1_commit=%d ips2_commit=%d)", + "wait for IPS1 commit clear (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1457,7 +1460,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) udelay(1); DC_LOG_IPS( - "wait for IPS1 commit clear done (ips1_commit=%d ips2_commit=%d)", + "wait for IPS1 commit clear done (ips1_commit=%u ips2_commit=%u)", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); } @@ -1466,7 +1469,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true)) ASSERT(0); - DC_LOG_IPS("%s exit (count rcg=%d ips1=%d ips2=%d)", + DC_LOG_IPS("%s exit (count rcg=%u ips1=%u ips2=%u)", __func__, rcg_exit_count, ips1_exit_count, @@ -1672,22 +1675,17 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; global_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); - /* send global configuration parameters */ - global_cmd->config.global.max_allow_delay_us = 100 * 1000; //100ms - global_cmd->config.global.lock_wait_time_us = 5000; //5ms - global_cmd->config.global.recovery_timeout_us = 5000; //5ms - global_cmd->config.global.hwfq_flip_programming_delay_us = 100; //100us - - /* copy static feature configuration */ - global_cmd->config.global.features.all = dc->debug.fams2_config.all; + if (enable) { + /* send global configuration parameters */ + memcpy(&global_cmd->config.global, &context->bw_ctx.bw.dcn.fams2_global_config, sizeof(struct dmub_cmd_fams2_global_config)); - /* apply feature configuration based on current driver state */ - global_cmd->config.global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2; - global_cmd->config.global.features.bits.enable = enable; + /* copy static feature configuration overrides */ + global_cmd->config.global.features.bits.enable_stall_recovery = dc->debug.fams2_config.bits.enable_stall_recovery; + global_cmd->config.global.features.bits.enable_debug = dc->debug.fams2_config.bits.enable_debug; + global_cmd->config.global.features.bits.enable_offload_flip = dc->debug.fams2_config.bits.enable_offload_flip; - /* construct per-stream configs */ - if (enable) { - for (i = 0; i < context->bw_ctx.bw.dcn.fams2_stream_count; i++) { + /* construct per-stream configs */ + for (i = 0; i < context->bw_ctx.bw.dcn.fams2_global_config.num_streams; i++) { struct dmub_rb_cmd_fams2 *stream_cmd = &cmd[i+1].fams2_config; /* configure command header */ @@ -1702,12 +1700,15 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, } } - if (enable && context->bw_ctx.bw.dcn.fams2_stream_count) { + /* apply feature configuration based on current driver state */ + global_cmd->config.global.features.bits.enable_visual_confirm = dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2; + global_cmd->config.global.features.bits.enable = enable; + + if (enable && context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) { /* set multi pending for global, and unset for last stream cmd */ - global_cmd->config.global.num_streams = context->bw_ctx.bw.dcn.fams2_stream_count; global_cmd->header.multi_cmd_pending = 1; - cmd[context->bw_ctx.bw.dcn.fams2_stream_count].fams2_config.header.multi_cmd_pending = 0; - num_cmds += context->bw_ctx.bw.dcn.fams2_stream_count; + cmd[context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config.header.multi_cmd_pending = 0; + num_cmds += context->bw_ctx.bw.dcn.fams2_global_config.num_streams; } dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 959ae0df1e56..c10567ec1c81 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -763,13 +763,6 @@ enum scanning_type { SCANNING_TYPE_UNDEFINED }; -enum chroma_cositing { - CHROMA_COSITING_NONE, - CHROMA_COSITING_LEFT, - CHROMA_COSITING_TOPLEFT, - CHROMA_COSITING_COUNT -}; - struct dc_crtc_timing_flags { uint32_t INTERLACE :1; uint32_t HSYNC_POSITIVE_POLARITY :1; /* when set to 1, diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 582606319764..8f85a1db5eba 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -128,6 +128,7 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->basic_out.always_scale = pipe_ctx->stream->ctx->dc->debug.always_scale; // Make spl input basic output info alpha_en field point to plane res scl_data lb_params alpha_en spl_in->basic_out.alpha_en = pipe_ctx->plane_res.scl_data.lb_params.alpha_en; + spl_in->basic_out.use_two_pixels_per_container = pipe_ctx->stream_res.tg->funcs->is_two_pixels_per_container(&stream->timing); // Make spl input basic input info scaling quality field point to plane state scaling_quality populate_spltaps_from_taps(&spl_in->scaling_quality, &plane_state->scaling_quality); // Translate edge adaptive scaler preference @@ -170,7 +171,6 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl /* Translate transfer function */ spl_in->basic_in.tf_type = (enum spl_transfer_func_type) plane_state->in_transfer_func.type; spl_in->basic_in.tf_predefined_type = (enum spl_transfer_func_predefined) plane_state->in_transfer_func.tf; - } /// @brief Translate SPL output parameters to pipe context diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 8ebd7e9e776e..de9bd72ca514 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -159,6 +159,12 @@ struct test_pattern { struct dc_stream_debug_options { char force_odm_combine_segments; + /* + * When force_odm_combine_segments is non zero, allow dc to + * temporarily transition to ODM bypass when minimal transition state + * is required to prevent visual glitches showing on the screen + */ + char allow_transition_for_forced_odm; }; #define LUMINANCE_DATA_TABLE_SIZE 10 @@ -260,6 +266,8 @@ struct dc_stream_state { struct dc_cursor_attributes cursor_attributes; struct dc_cursor_position cursor_position; + bool hw_cursor_req; + uint32_t sdr_white_level; // for boosting (SDR) cursor in HDR mode /* from stream struct */ @@ -344,6 +352,7 @@ struct dc_stream_update { struct dc_cursor_attributes *cursor_attributes; struct dc_cursor_position *cursor_position; + bool *hw_cursor_req; }; bool dc_is_stream_unchanged( diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index c550e8997033..97279b080f3e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -590,6 +590,7 @@ enum dc_psr_state { PSR_STATE5c, PSR_STATE_HWLOCK_MGR, PSR_STATE_POLLVUPDATE, + PSR_STATE_RELEASE_HWLOCK_MGR_FULL_FRAME, PSR_STATE_INVALID = 0xFF }; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h index 1e0292861244..6ac2bd86c4db 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn20/dcn20_dccg.h @@ -346,11 +346,7 @@ type SYMCLK32_LE3_SRC_SEL;\ type SYMCLK32_LE2_EN;\ type SYMCLK32_LE3_EN;\ - type DP_DTO_ENABLE[MAX_PIPES];\ - type DSCCLK0_DTO_DB_EN;\ - type DSCCLK1_DTO_DB_EN;\ - type DSCCLK2_DTO_DB_EN;\ - type DSCCLK3_DTO_DB_EN; + type DP_DTO_ENABLE[MAX_PIPES]; struct dccg_shift { DCCG_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c index 68cd3258f4a9..7f91e48902e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c @@ -41,6 +41,1041 @@ #define DC_LOGGER \ dccg->ctx->logger +enum symclk_fe_source { + SYMCLK_FE_SYMCLK_A = 0, // Select functional clock from backend symclk A + SYMCLK_FE_SYMCLK_B, + SYMCLK_FE_SYMCLK_C, + SYMCLK_FE_SYMCLK_D, + SYMCLK_FE_SYMCLK_E, + SYMCLK_FE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum symclk_be_source { + SYMCLK_BE_PHYCLK = 0, // Select phy clk when sym_clk_enable = 1 + SYMCLK_BE_DPIACLK_810 = 4, + SYMCLK_BE_DPIACLK_162 = 5, + SYMCLK_BE_DPIACLK_540 = 6, + SYMCLK_BE_DPIACLK_270 = 7, + SYMCLK_BE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum physymclk_source { + PHYSYMCLK_PHYCLK = 0, // Select symclk as source of clock which is output to PHY through DCIO. + PHYSYMCLK_PHYD18CLK, // Select phyd18clk as the source of clock which is output to PHY through DCIO. + PHYSYMCLK_PHYD32CLK, // Select phyd32clk as the source of clock which is output to PHY through DCIO. + PHYSYMCLK_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum dtbclk_source { + DTBCLK_DPREFCLK = 0, // Selects source for DTBCLK_P# as DPREFCLK (src sel 0 and 1 are same) + DTBCLK_DPREFCLK_0, // Selects source for DTBCLK_P# as DPREFCLK (src sel 0 and 1 are same) + DTBCLK_DTBCLK0, // Selects source for DTBCLK_P# as DTBCLK0 + DTBCLK_DTBCLK1, // Selects source for DTBCLK_P# as DTBCLK0 + DTBCLK_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum dppclk_clock_source { + DPP_REFCLK = 0, // refclk is selected + DPP_DCCG_DTO, // Functional clock selected is DTO tuned DPPCLK +}; + +enum dp_stream_clk_source { + DP_STREAM_DTBCLK_P0 = 0, // Selects functional for DP_STREAM_CLK as DTBCLK_P# + DP_STREAM_DTBCLK_P1, + DP_STREAM_DTBCLK_P2, + DP_STREAM_DTBCLK_P3, + DP_STREAM_DTBCLK_P4, + DP_STREAM_DTBCLK_P5, + DP_STREAM_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum hdmi_char_clk { + HDMI_CHAR_PHYAD18CLK = 0, // Selects functional for hdmi_char_clk as UNIPHYA PHYD18CLK + HDMI_CHAR_PHYBD18CLK, + HDMI_CHAR_PHYCD18CLK, + HDMI_CHAR_PHYDD18CLK, + HDMI_CHAR_PHYED18CLK, + HDMI_CHAR_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum hdmi_stream_clk_source { + HDMI_STREAM_DTBCLK_P0 = 0, // Selects functional for HDMI_STREAM_CLK as DTBCLK_P# + HDMI_STREAM_DTBCLK_P1, + HDMI_STREAM_DTBCLK_P2, + HDMI_STREAM_DTBCLK_P3, + HDMI_STREAM_DTBCLK_P4, + HDMI_STREAM_DTBCLK_P5, + HDMI_STREAM_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum symclk32_se_clk_source { + SYMCLK32_SE_PHYAD32CLK = 0, // Selects functional for SYMCLK32 as UNIPHYA PHYD32CLK + SYMCLK32_SE_PHYBD32CLK, + SYMCLK32_SE_PHYCD32CLK, + SYMCLK32_SE_PHYDD32CLK, + SYMCLK32_SE_PHYED32CLK, + SYMCLK32_SE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum symclk32_le_clk_source { + SYMCLK32_LE_PHYAD32CLK = 0, // Selects functional for SYMCLK32 as UNIPHYA PHYD32CLK + SYMCLK32_LE_PHYBD32CLK, + SYMCLK32_LE_PHYCD32CLK, + SYMCLK32_LE_PHYDD32CLK, + SYMCLK32_LE_PHYED32CLK, + SYMCLK32_LE_REFCLK = 0xFF, // Arbitrary value to pass refclk selection in software +}; + +enum dsc_clk_source { + DSC_CLK_REF_CLK = 0, // Ref clock selected for DSC_CLK + DSC_DTO_TUNED_CK_GPU_DISCLK_3, // DTO divided clock selected as functional clock +}; + + +static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk32_se_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + return; + + /* SYMCLK32_ROOT_SE#_GATE_DISABLE will clock gate in DCCG */ + /* SYMCLK32_SE#_GATE_DISABLE will clock gate in HPO only */ + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk32_le_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_le) + return; + + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE0_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_LE0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_LE1_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_LE1_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_physymclk_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYASYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYBSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYCSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYDSYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL2, + PHYESYMCLK_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk_fe_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKA_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKB_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKC_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKD_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKE_FE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk_be_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* TBD add symclk_be in rcg control bits */ + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.physymclk) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKA_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKB_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKC_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKD_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 4: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, + SYMCLKE_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable) +{ + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P1_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P2_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL5, DTBCLK_P3_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} + +static void dccg35_set_dppclk_rcg(struct dccg *dccg, + int inst, bool enable) +{ + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) + return; + + switch (inst) { + case 0: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} + +static void dccg35_set_dpstreamclk_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpstream) + return; + + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK0_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK1_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK2_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL5, + DPSTREAMCLK3_GATE_DISABLE, enable ? 0 : 1, + DPSTREAMCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_smclk32_se_rcg( + struct dccg *dccg, + int inst, + bool enable) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (!dccg->ctx->dc->debug.root_clock_optimization.bits.symclk32_se) + return; + + switch (inst) { + case 0: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, enable ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, enable ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, enable ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, enable ? 0 : 1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, enable ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_dsc_clk_src_new(struct dccg *dccg, int inst, enum dsc_clk_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* DSCCLK#_EN=0 switches to refclock from functional clock */ + + switch (inst) { + case 0: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, src); + break; + case 1: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, src); + break; + case 2: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, src); + break; + case 3: + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, src); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk32_se_src_new( + struct dccg *dccg, + int inst, + enum symclk32_se_clk_source src + ) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE0_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE0_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + case 1: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE1_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE1_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + case 2: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE2_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE2_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + case 3: + REG_UPDATE_2(SYMCLK32_SE_CNTL, + SYMCLK32_SE3_SRC_SEL, (src == SYMCLK32_SE_REFCLK) ? 0 : src, + SYMCLK32_SE3_EN, (src == SYMCLK32_SE_REFCLK) ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static int +dccg35_is_symclk32_se_src_functional_le_new(struct dccg *dccg, int symclk_32_se_inst, int symclk_32_le_inst) +{ + uint32_t en; + uint32_t src_sel; + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + REG_GET_2(SYMCLK32_SE_CNTL, SYMCLK32_SE3_SRC_SEL, &src_sel, SYMCLK32_SE3_EN, &en); + + if (en == 1 && src_sel == symclk_32_le_inst) + return 1; + + return 0; +} + + +static void dccg35_set_symclk32_le_src_new( + struct dccg *dccg, + int inst, + enum symclk32_le_clk_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE0_SRC_SEL, (src == SYMCLK32_LE_REFCLK) ? 0 : src, + SYMCLK32_LE0_EN, (src == SYMCLK32_LE_REFCLK) ? 0 : 1); + break; + case 1: + REG_UPDATE_2(SYMCLK32_LE_CNTL, + SYMCLK32_LE1_SRC_SEL, (src == SYMCLK32_LE_REFCLK) ? 0 : src, + SYMCLK32_LE1_EN, (src == SYMCLK32_LE_REFCLK) ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dcn35_set_dppclk_src_new(struct dccg *dccg, + int inst, enum dppclk_clock_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE(DPPCLK_CTRL, DPPCLK0_EN, src); + break; + case 1: + REG_UPDATE(DPPCLK_CTRL, DPPCLK1_EN, src); + break; + case 2: + REG_UPDATE(DPPCLK_CTRL, DPPCLK2_EN, src); + break; + case 3: + REG_UPDATE(DPPCLK_CTRL, DPPCLK3_EN, src); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } +} + +static void dccg35_set_dtbclk_p_src_new( + struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + /* If DTBCLK_P#_EN is 0 refclock is selected as functional clock + * If DTBCLK_P#_EN is 1 functional clock is selected as DTBCLK_P#_SRC_SEL + */ + + switch (inst) { + case 0: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P0_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P0_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + case 1: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P1_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P1_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + case 2: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P2_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P2_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + case 3: + REG_UPDATE_2(DTBCLK_P_CNTL, + DTBCLK_P3_SRC_SEL, (src == DTBCLK_REFCLK) ? 0 : src, + DTBCLK_P3_EN, (src == DTBCLK_REFCLK) ? 0 : 1); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_dpstreamclk_src_new( + struct dccg *dccg, + enum dp_stream_clk_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK0_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK1_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + + break; + case 2: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK2_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + + break; + case 3: + REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, + (src == DP_STREAM_REFCLK) ? 0 : 1, + DPSTREAMCLK3_SRC_SEL, + (src == DP_STREAM_REFCLK) ? 0 : src); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_physymclk_src_new( + struct dccg *dccg, + enum physymclk_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(PHYASYMCLK_CLOCK_CNTL, PHYASYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYASYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(PHYBSYMCLK_CLOCK_CNTL, PHYBSYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYBSYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 2: + REG_UPDATE_2(PHYCSYMCLK_CLOCK_CNTL, PHYCSYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYCSYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 3: + REG_UPDATE_2(PHYDSYMCLK_CLOCK_CNTL, PHYDSYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYDSYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + case 4: + REG_UPDATE_2(PHYESYMCLK_CLOCK_CNTL, PHYESYMCLK_EN, + (src == PHYSYMCLK_REFCLK) ? 0 : 1, + PHYESYMCLK_SRC_SEL, + (src == PHYSYMCLK_REFCLK) ? 0 : src); + break; + default: + BREAK_TO_DEBUGGER(); + return; + } +} + +static void dccg35_set_symclk_be_src_new( + struct dccg *dccg, + enum symclk_be_source src, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE, + SYMCLKA_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKA_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE, + SYMCLKB_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKB_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 2: + REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE, + SYMCLKC_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKC_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 3: + REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE, + SYMCLKD_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKD_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + case 4: + REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE, + SYMCLKE_CLOCK_ENABLE, (src == SYMCLK_BE_REFCLK) ? 0 : 1, + SYMCLKE_SRC_SEL, (src == SYMCLK_BE_REFCLK) ? 0 : src); + break; + } +} + +static int dccg35_is_symclk_fe_src_functional_be(struct dccg *dccg, + int symclk_fe_inst, + int symclk_be_inst) +{ + + uint32_t en = 0; + uint32_t src_sel = 0; + + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (symclk_fe_inst) { + case 0: + REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_SRC_SEL, &src_sel, SYMCLKA_FE_EN, &en); + break; + case 1: + REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_SRC_SEL, &src_sel, SYMCLKB_FE_EN, &en); + break; + case 2: + REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_SRC_SEL, &src_sel, SYMCLKC_FE_EN, &en); + break; + case 3: + REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_SRC_SEL, &src_sel, SYMCLKD_FE_EN, &en); + break; + case 4: + REG_GET_2(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_SRC_SEL, &src_sel, SYMCLKE_FE_EN, &en); + break; + } + + if (en == 1 && src_sel == symclk_be_inst) + return 1; + + return 0; +} + +static void dccg35_set_symclk_fe_src_new(struct dccg *dccg, enum symclk_fe_source src, int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_UPDATE_2(SYMCLKA_CLOCK_ENABLE, + SYMCLKA_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKA_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); + break; + case 1: + REG_UPDATE_2(SYMCLKB_CLOCK_ENABLE, + SYMCLKB_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKB_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); + break; + case 2: + REG_UPDATE_2(SYMCLKC_CLOCK_ENABLE, + SYMCLKC_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKC_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); + break; + case 3: + REG_UPDATE_2(SYMCLKD_CLOCK_ENABLE, + SYMCLKD_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKD_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); + break; + case 4: + REG_UPDATE_2(SYMCLKE_CLOCK_ENABLE, + SYMCLKE_FE_EN, (src == SYMCLK_FE_REFCLK) ? 0 : 1, + SYMCLKE_FE_SRC_SEL, (src == SYMCLK_FE_REFCLK) ? 0 : src); + break; + } +} + +static uint32_t dccg35_is_fe_rcg(struct dccg *dccg, int inst) +{ + uint32_t enable = 0; + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKA_FE_ROOT_GATE_DISABLE, &enable); + break; + case 1: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKB_FE_ROOT_GATE_DISABLE, &enable); + break; + case 2: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKC_FE_ROOT_GATE_DISABLE, &enable); + break; + case 3: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKD_FE_ROOT_GATE_DISABLE, &enable); + break; + case 4: + REG_GET(DCCG_GATE_DISABLE_CNTL5, + SYMCLKE_FE_ROOT_GATE_DISABLE, &enable); + break; + default: + BREAK_TO_DEBUGGER(); + break; + } + return enable; +} + +static uint32_t dccg35_is_symclk32_se_rcg(struct dccg *dccg, int inst) +{ + uint32_t disable_l1 = 0; + uint32_t disable_l2 = 0; + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + switch (inst) { + case 0: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE0_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE0_GATE_DISABLE, &disable_l2); + break; + case 1: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE1_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE1_GATE_DISABLE, &disable_l2); + break; + case 2: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE2_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE2_GATE_DISABLE, &disable_l2); + break; + case 3: + REG_GET_2(DCCG_GATE_DISABLE_CNTL3, + SYMCLK32_SE3_GATE_DISABLE, &disable_l1, + SYMCLK32_ROOT_SE3_GATE_DISABLE, &disable_l2); + break; + default: + BREAK_TO_DEBUGGER(); + return 0; + } + + /* return true if either block level or DCCG level gating is active */ + return (disable_l1 | disable_l2); +} + +static void dccg35_enable_symclk_fe_new( + struct dccg *dccg, + int inst, + enum symclk_fe_source src) +{ + dccg35_set_symclk_fe_rcg(dccg, inst, false); + dccg35_set_symclk_fe_src_new(dccg, src, inst); +} + +static void dccg35_disable_symclk_fe_new( + struct dccg *dccg, + int inst) +{ + dccg35_set_symclk_fe_src_new(dccg, SYMCLK_FE_REFCLK, inst); + dccg35_set_symclk_fe_rcg(dccg, inst, true); +} + +static void dccg35_enable_symclk_be_new( + struct dccg *dccg, + int inst, + enum symclk_be_source src) +{ + dccg35_set_symclk_be_rcg(dccg, inst, false); + dccg35_set_symclk_be_src_new(dccg, inst, src); +} + +static void dccg35_disable_symclk_be_new( + struct dccg *dccg, + int inst) +{ + int i; + + /* Switch from functional clock to refclock */ + dccg35_set_symclk_be_src_new(dccg, inst, SYMCLK_BE_REFCLK); + + /* Check if any other SE connected LE and disable them */ + for (i = 0; i < 4; i++) { + /* Make sure FE is not already in RCG */ + if (dccg35_is_fe_rcg(dccg, i) == 0) { + if (dccg35_is_symclk_fe_src_functional_be(dccg, i, inst)) + dccg35_disable_symclk_fe_new(dccg, i); + } + } + /* Safe to RCG SYMCLK*/ + dccg35_set_symclk_be_rcg(dccg, inst, true); +} + +static void dccg35_enable_symclk32_se_new( + struct dccg *dccg, + int inst, + enum symclk32_se_clk_source src) +{ + dccg35_set_symclk32_se_rcg(dccg, inst, false); + dccg35_set_symclk32_se_src_new(dccg, inst, src); +} + +static void dccg35_disable_symclk32_se_new( + struct dccg *dccg, + int inst) +{ + dccg35_set_symclk32_se_src_new(dccg, SYMCLK32_SE_REFCLK, inst); + dccg35_set_symclk32_se_rcg(dccg, inst, true); +} + +static void dccg35_enable_symclk32_le_new( + struct dccg *dccg, + int inst, + enum symclk32_le_clk_source src) +{ + dccg35_set_symclk32_le_rcg(dccg, inst, false); + dccg35_set_symclk32_le_src_new(dccg, inst, src); +} + +static void dccg35_disable_symclk32_le_new( + struct dccg *dccg, + int inst) +{ + int i; + + /* Switch from functional clock to refclock */ + dccg35_set_symclk32_le_src_new(dccg, inst, SYMCLK32_LE_REFCLK); + + /* Check if any SE are connected and disable SE as well */ + for (i = 0; i < 4; i++) { + /* Make sure FE is not already in RCG */ + if (dccg35_is_symclk32_se_rcg(dccg, i) == 0) { + /* Disable and SE connected to this LE before RCG */ + if (dccg35_is_symclk32_se_src_functional_le_new(dccg, i, inst)) + dccg35_disable_symclk32_se_new(dccg, i); + } + } + /* Safe to RCG SYM32_LE*/ + dccg35_set_symclk32_le_rcg(dccg, inst, true); +} + +static void dccg35_enable_physymclk_new(struct dccg *dccg, + int inst, + enum physymclk_source src) +{ + dccg35_set_physymclk_rcg(dccg, inst, false); + dccg35_set_physymclk_src_new(dccg, src, inst); +} + +static void dccg35_disable_physymclk_new(struct dccg *dccg, + int inst) +{ + dccg35_set_physymclk_src_new(dccg, PHYSYMCLK_REFCLK, inst); + dccg35_set_physymclk_rcg(dccg, inst, true); +} + +static void dccg35_enable_dpp_clk_new( + struct dccg *dccg, + int inst, + enum dppclk_clock_source src) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + /* Sanitize inst before use in array de-ref */ + if (inst < 0) { + BREAK_TO_DEBUGGER(); + return; + } + dccg35_set_dppclk_rcg(dccg, inst, false); + dcn35_set_dppclk_src_new(dccg, inst, src); + /* Switch DPP clock to DTO */ + REG_SET_2(DPPCLK_DTO_PARAM[inst], 0, + DPPCLK0_DTO_PHASE, 0xFF, + DPPCLK0_DTO_MODULO, 0xFF); +} + +static void dccg35_disable_dpp_clk_new( + struct dccg *dccg, + int inst) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + /* Sanitize inst before use in array de-ref */ + if (inst < 0) { + BREAK_TO_DEBUGGER(); + return; + } + dcn35_set_dppclk_src_new(dccg, inst, DPP_REFCLK); + REG_SET_2(DPPCLK_DTO_PARAM[inst], 0, + DPPCLK0_DTO_PHASE, 0, + DPPCLK0_DTO_MODULO, 1); + dccg35_set_dppclk_rcg(dccg, inst, true); +} + +static void dccg35_disable_dscclk_new(struct dccg *dccg, + int inst) +{ + dccg35_set_dsc_clk_src_new(dccg, inst, DSC_CLK_REF_CLK); + dccg35_set_dsc_clk_rcg(dccg, inst, true); +} + +static void dccg35_enable_dscclk_new(struct dccg *dccg, + int inst, + enum dsc_clk_source src) +{ + dccg35_set_dsc_clk_rcg(dccg, inst, false); + dccg35_set_dsc_clk_src_new(dccg, inst, src); +} + +static void dccg35_enable_dtbclk_p_new(struct dccg *dccg, + enum dtbclk_source src, + int inst) +{ + dccg35_set_dtbclk_p_rcg(dccg, inst, false); + dccg35_set_dtbclk_p_src_new(dccg, src, inst); +} + +static void dccg35_disable_dtbclk_p_new(struct dccg *dccg, + int inst) +{ + dccg35_set_dtbclk_p_src_new(dccg, DTBCLK_REFCLK, inst); + dccg35_set_dtbclk_p_rcg(dccg, inst, true); +} + +static void dccg35_disable_dpstreamclk_new(struct dccg *dccg, + int inst) +{ + dccg35_set_dpstreamclk_src_new(dccg, DP_STREAM_REFCLK, inst); + dccg35_set_dpstreamclk_rcg(dccg, inst, true); +} + +static void dccg35_enable_dpstreamclk_new(struct dccg *dccg, + enum dp_stream_clk_source src, + int inst) +{ + dccg35_set_dpstreamclk_rcg(dccg, inst, false); + dccg35_set_dpstreamclk_src_new(dccg, src, inst); +} + static void dccg35_trigger_dio_fifo_resync(struct dccg *dccg) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -997,6 +2032,322 @@ static void dccg35_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst } } +static void dccg35_set_dpstreamclk_cb( + struct dccg *dccg, + enum streamclk_source src, + int otg_inst, + int dp_hpo_inst) +{ + + enum dtbclk_source dtb_clk_src; + enum dp_stream_clk_source dp_stream_clk_src; + + ASSERT(otg_inst >= DP_STREAM_DTBCLK_P5); + + switch (src) { + case REFCLK: + dtb_clk_src = DTBCLK_REFCLK; + dp_stream_clk_src = DP_STREAM_REFCLK; + break; + case DPREFCLK: + dtb_clk_src = DTBCLK_DPREFCLK; + dp_stream_clk_src = (enum dp_stream_clk_source)otg_inst; + break; + case DTBCLK0: + dtb_clk_src = DTBCLK_DTBCLK0; + dp_stream_clk_src = (enum dp_stream_clk_source)otg_inst; + break; + default: + BREAK_TO_DEBUGGER(); + return; + } + + if (dtb_clk_src == DTBCLK_REFCLK && + dp_stream_clk_src == DP_STREAM_REFCLK) { + dccg35_disable_dtbclk_p_new(dccg, otg_inst); + dccg35_disable_dpstreamclk_new(dccg, dp_hpo_inst); + } else { + dccg35_enable_dtbclk_p_new(dccg, dtb_clk_src, otg_inst); + dccg35_enable_dpstreamclk_new(dccg, + dp_stream_clk_src, + dp_hpo_inst); + } +} + +static void dccg35_set_dpstreamclk_root_clock_gating_cb( + struct dccg *dccg, + int dp_hpo_inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Instance 0 is implied here since only one streamclock resource + * Redundant as gating when enabled is acheived through set_dpstreamclk + */ + if (power_on) + dccg35_enable_dpstreamclk_new(dccg, + DP_STREAM_REFCLK, + dp_hpo_inst); + else + dccg35_disable_dpstreamclk_new(dccg, dp_hpo_inst); +} + +static void dccg35_update_dpp_dto_cb(struct dccg *dccg, int dpp_inst, + int req_dppclk) +{ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + + if (dccg->ref_dppclk && req_dppclk) { + int ref_dppclk = dccg->ref_dppclk; + int modulo, phase; + + // phase / modulo = dpp pipe clk / dpp global clk + modulo = 0xff; // use FF at the end + phase = ((modulo * req_dppclk) + ref_dppclk - 1) / ref_dppclk; + + if (phase > 0xff) { + ASSERT(false); + phase = 0xff; + } + + /* Enable DPP CLK DTO output */ + dccg35_enable_dpp_clk_new(dccg, dpp_inst, DPP_DCCG_DTO); + + /* Program DTO */ + REG_SET_2(DPPCLK_DTO_PARAM[dpp_inst], 0, + DPPCLK0_DTO_PHASE, phase, + DPPCLK0_DTO_MODULO, modulo); + } else + dccg35_disable_dpp_clk_new(dccg, dpp_inst); + + dccg->pipe_dppclk_khz[dpp_inst] = req_dppclk; +} + +static void dccg35_dpp_root_clock_control_cb( + struct dccg *dccg, + unsigned int dpp_inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Redundant as gating when enabled is acheived through update_dpp_dto + */ + if (power_on) + dccg35_enable_dpp_clk_new(dccg, dpp_inst, DPP_REFCLK); + else + dccg35_disable_dpp_clk_new(dccg, dpp_inst); +} + +static void dccg35_enable_symclk32_se_cb( + struct dccg *dccg, + int inst, + enum phyd32clk_clock_source phyd32clk) +{ + dccg35_enable_symclk32_se_new(dccg, inst, (enum symclk32_se_clk_source)phyd32clk); +} + +static void dccg35_disable_symclk32_se_cb(struct dccg *dccg, int inst) +{ + dccg35_disable_symclk32_se_new(dccg, inst); +} + +static void dccg35_enable_symclk32_le_cb( + struct dccg *dccg, + int inst, + enum phyd32clk_clock_source src) +{ + dccg35_enable_symclk32_le_new(dccg, inst, (enum symclk32_le_clk_source) src); +} + +static void dccg35_disable_symclk32_le_cb(struct dccg *dccg, int inst) +{ + dccg35_disable_symclk32_le_new(dccg, inst); +} + +static void dccg35_set_symclk32_le_root_clock_gating_cb( + struct dccg *dccg, + int inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Redundant as gating when enabled is acheived through disable_symclk32_le + */ + if (power_on) + dccg35_enable_symclk32_le_new(dccg, inst, SYMCLK32_LE_REFCLK); + else + dccg35_disable_symclk32_le_new(dccg, inst); +} + +static void dccg35_set_physymclk_cb( + struct dccg *dccg, + int inst, + enum physymclk_clock_source clk_src, + bool force_enable) +{ + /* force_enable = 0 indicates we can switch to ref clock */ + if (force_enable) + dccg35_enable_physymclk_new(dccg, inst, (enum physymclk_source)clk_src); + else + dccg35_disable_physymclk_new(dccg, inst); +} + +static void dccg35_set_physymclk_root_clock_gating_cb( + struct dccg *dccg, + int inst, + bool power_on) +{ + /* Redundant RCG already done in disable_physymclk + * power_on = 1 indicates we need to ungate + */ + if (power_on) + dccg35_enable_physymclk_new(dccg, inst, PHYSYMCLK_REFCLK); + else + dccg35_disable_physymclk_new(dccg, inst); +} + +static void dccg35_set_symclk32_le_root_clock_gating( + struct dccg *dccg, + int inst, + bool power_on) +{ + /* power_on set indicates we need to ungate + * Currently called from optimize_bandwidth and prepare_bandwidth calls + * Since clock source is not passed restore to refclock on ungate + * Redundant as gating when enabled is acheived through disable_symclk32_le + */ + if (power_on) + dccg35_enable_symclk32_le_new(dccg, inst, SYMCLK32_LE_REFCLK); + else + dccg35_disable_symclk32_le_new(dccg, inst); +} + +static void dccg35_set_dtbclk_p_src_cb( + struct dccg *dccg, + enum streamclk_source src, + uint32_t inst) +{ + if (src == DTBCLK0) + dccg35_enable_dtbclk_p_new(dccg, DTBCLK_DTBCLK0, inst); + else + dccg35_disable_dtbclk_p_new(dccg, inst); +} + +static void dccg35_set_dtbclk_dto_cb( + struct dccg *dccg, + const struct dtbclk_dto_params *params) +{ + /* set_dtbclk_p_src typ called earlier to switch to DTBCLK + * if params->ref_dtbclk_khz and req_dtbclk_khz are 0 switch to ref-clock + */ + struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); + /* DTO Output Rate / Pixel Rate = 1/4 */ + int req_dtbclk_khz = params->pixclk_khz / 4; + + if (params->ref_dtbclk_khz && req_dtbclk_khz) { + uint32_t modulo, phase; + + dccg35_enable_dtbclk_p_new(dccg, DTBCLK_DTBCLK0, params->otg_inst); + + // phase / modulo = dtbclk / dtbclk ref + modulo = params->ref_dtbclk_khz * 1000; + phase = req_dtbclk_khz * 1000; + + REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo); + REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase); + + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_ENABLE[params->otg_inst], 1); + + REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1, + 1, 100); + + /* program OTG_PIXEL_RATE_DIV for DIVK1 and DIVK2 fields */ + dccg35_set_pixel_rate_div(dccg, params->otg_inst, PIXEL_RATE_DIV_BY_1, PIXEL_RATE_DIV_BY_1); + + /* The recommended programming sequence to enable DTBCLK DTO to generate + * valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should + * be set only after DTO is enabled + */ + REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst], + PIPE_DTO_SRC_SEL[params->otg_inst], 2); + } else { + dccg35_disable_dtbclk_p_new(dccg, params->otg_inst); + + REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst], + DTBCLK_DTO_ENABLE[params->otg_inst], 0, + PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1); + + REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0); + REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0); + } +} + +static void dccg35_disable_dscclk_cb(struct dccg *dccg, + int inst) +{ + dccg35_disable_dscclk_new(dccg, inst); +} + +static void dccg35_enable_dscclk_cb(struct dccg *dccg, int inst) +{ + dccg35_enable_dscclk_new(dccg, inst, DSC_DTO_TUNED_CK_GPU_DISCLK_3); +} + +static void dccg35_enable_symclk_se_cb(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst) +{ + /* Switch to functional clock if already not selected */ + dccg35_enable_symclk_be_new(dccg, SYMCLK_BE_PHYCLK, link_enc_inst); + + dccg35_enable_symclk_fe_new(dccg, stream_enc_inst, (enum symclk_fe_source) link_enc_inst); + +} + +static void dccg35_disable_symclk_se_cb( + struct dccg *dccg, + uint32_t stream_enc_inst, + uint32_t link_enc_inst) +{ + dccg35_disable_symclk_fe_new(dccg, stream_enc_inst); + + /* DMU PHY sequence switches SYMCLK_BE (link_enc_inst) to ref clock once PHY is turned off */ +} + +static const struct dccg_funcs dccg35_funcs_new = { + .update_dpp_dto = dccg35_update_dpp_dto_cb, + .dpp_root_clock_control = dccg35_dpp_root_clock_control_cb, + .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, + .dccg_init = dccg35_init, + .set_dpstreamclk = dccg35_set_dpstreamclk_cb, + .set_dpstreamclk_root_clock_gating = dccg35_set_dpstreamclk_root_clock_gating_cb, + .enable_symclk32_se = dccg35_enable_symclk32_se_cb, + .disable_symclk32_se = dccg35_disable_symclk32_se_cb, + .enable_symclk32_le = dccg35_enable_symclk32_le_cb, + .disable_symclk32_le = dccg35_disable_symclk32_le_cb, + .set_symclk32_le_root_clock_gating = dccg35_set_symclk32_le_root_clock_gating_cb, + .set_physymclk = dccg35_set_physymclk_cb, + .set_physymclk_root_clock_gating = dccg35_set_physymclk_root_clock_gating_cb, + .set_dtbclk_dto = dccg35_set_dtbclk_dto_cb, + .set_audio_dtbclk_dto = dccg31_set_audio_dtbclk_dto, + .set_fifo_errdet_ovr_en = dccg2_set_fifo_errdet_ovr_en, + .otg_add_pixel = dccg31_otg_add_pixel, + .otg_drop_pixel = dccg31_otg_drop_pixel, + .set_dispclk_change_mode = dccg31_set_dispclk_change_mode, + .disable_dsc = dccg35_disable_dscclk_cb, + .enable_dsc = dccg35_enable_dscclk_cb, + .set_pixel_rate_div = dccg35_set_pixel_rate_div, + .get_pixel_rate_div = dccg35_get_pixel_rate_div, + .trigger_dio_fifo_resync = dccg35_trigger_dio_fifo_resync, + .set_valid_pixel_rate = dccg35_set_valid_pixel_rate, + .enable_symclk_se = dccg35_enable_symclk_se_cb, + .disable_symclk_se = dccg35_disable_symclk_se_cb, + .set_dtbclk_p_src = dccg35_set_dtbclk_p_src_cb, +}; + static const struct dccg_funcs dccg35_funcs = { .update_dpp_dto = dccg35_update_dpp_dto, .dpp_root_clock_control = dccg35_dpp_root_clock_control, @@ -1026,6 +2377,7 @@ static const struct dccg_funcs dccg35_funcs = { .enable_symclk_se = dccg35_enable_symclk_se, .disable_symclk_se = dccg35_disable_symclk_se, .set_dtbclk_p_src = dccg35_set_dtbclk_p_src, + }; struct dccg *dccg35_create( @@ -1041,6 +2393,10 @@ struct dccg *dccg35_create( BREAK_TO_DEBUGGER(); return NULL; } + (void)&dccg35_disable_symclk_be_new; + (void)&dccg35_set_symclk32_le_root_clock_gating; + (void)&dccg35_set_smclk32_se_rcg; + (void)&dccg35_funcs_new; base = &dccg_dcn->base; base->ctx = ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c index 07f1f396ba52..0b889004509a 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.c @@ -730,35 +730,35 @@ void dccg401_init(struct dccg *dccg) } } -static void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst, bool enable) +static void dccg401_set_dto_dscclk(struct dccg *dccg, uint32_t inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - uint32_t phase = enable ? 1 : 0; switch (inst) { case 0: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1, DSCCLK0_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK0_DTO_PARAM, - DSCCLK0_DTO_PHASE, phase, + DSCCLK0_DTO_PHASE, 1, DSCCLK0_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1); + break; case 1: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1, DSCCLK1_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK1_DTO_PARAM, - DSCCLK1_DTO_PHASE, phase, + DSCCLK1_DTO_PHASE, 1, DSCCLK1_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1); break; case 2: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1, DSCCLK2_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK2_DTO_PARAM, - DSCCLK2_DTO_PHASE, phase, + DSCCLK2_DTO_PHASE, 1, DSCCLK2_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1); break; case 3: - REG_UPDATE_2(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1, DSCCLK3_DTO_DB_EN, 1); REG_UPDATE_2(DSCCLK3_DTO_PARAM, - DSCCLK3_DTO_PHASE, phase, + DSCCLK3_DTO_PHASE, 1, DSCCLK3_DTO_MODULO, 1); + REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 1); break; default: BREAK_TO_DEBUGGER(); @@ -774,15 +774,27 @@ static void dccg401_set_ref_dscclk(struct dccg *dccg, switch (dsc_inst) { case 0: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 0); + REG_UPDATE_2(DSCCLK0_DTO_PARAM, + DSCCLK0_DTO_PHASE, 0, + DSCCLK0_DTO_MODULO, 0); break; case 1: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 0); + REG_UPDATE_2(DSCCLK1_DTO_PARAM, + DSCCLK1_DTO_PHASE, 0, + DSCCLK1_DTO_MODULO, 0); break; case 2: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 0); + REG_UPDATE_2(DSCCLK2_DTO_PARAM, + DSCCLK2_DTO_PHASE, 0, + DSCCLK2_DTO_MODULO, 0); break; case 3: REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK3_EN, 0); + REG_UPDATE_2(DSCCLK3_DTO_PARAM, + DSCCLK3_DTO_PHASE, 0, + DSCCLK3_DTO_MODULO, 0); break; default: return; diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h index 8bcddc836347..a196ce9e8127 100644 --- a/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn401/dcn401_dccg.h @@ -117,10 +117,6 @@ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_EN, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_EN, mask_sh),\ DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK0_DTO_DB_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK1_DTO_DB_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK2_DTO_DB_EN, mask_sh),\ - DCCG_SF(DSCCLK_DTO_CTRL, DSCCLK3_DTO_DB_EN, mask_sh),\ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_PHASE, mask_sh),\ DCCG_SF(DSCCLK0_DTO_PARAM, DSCCLK0_DTO_MODULO, mask_sh),\ DCCG_SF(DSCCLK1_DTO_PARAM, DSCCLK1_DTO_PHASE, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index cf5f84fb9c69..eeed840073fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -630,6 +630,11 @@ void dce_aud_az_enable(struct audio *audio) audio->inst, value); } +void dce_aud_az_disable_hbr_audio(struct audio *audio) +{ + set_high_bit_rate_capable(audio, false); +} + void dce_aud_az_disable(struct audio *audio) { uint32_t value; @@ -1293,6 +1298,7 @@ static const struct audio_funcs funcs = { .az_enable = dce_aud_az_enable, .az_disable = dce_aud_az_disable, .az_configure = dce_aud_az_configure, + .az_disable_hbr_audio = dce_aud_az_disable_hbr_audio, .destroy = dce_aud_destroy, }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h index 539f881928d1..1b7b8b079af4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.h @@ -166,6 +166,7 @@ void dce_aud_hw_init(struct audio *audio); void dce_aud_az_enable(struct audio *audio); void dce_aud_az_disable(struct audio *audio); +void dce_aud_az_disable_hbr_audio(struct audio *audio); void dce_aud_az_configure(struct audio *audio, enum signal_type signal, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index b8996d285f00..bb4ac5042c80 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -735,7 +735,15 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, (unsigned int) payload->mot); if (payload->write) dce_aux_log_payload(" write", payload->data, payload->length, 16); - ret = dce_aux_transfer_raw(ddc, payload, &operation_result); + + /* Check whether aux to be processed via dmub or dcn directly */ + if (ddc->ctx->dc->debug.enable_dmub_aux_for_legacy_ddc + || ddc->ddc_pin == NULL) { + ret = dce_aux_transfer_dmub_raw(ddc, payload, &operation_result); + } else { + ret = dce_aux_transfer_raw(ddc, payload, &operation_result); + } + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, LOG_FLAG_I2cAux_DceAux, "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d payload->reply=%u", diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index ccf153b7a467..cae18f8c1c9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -94,6 +94,8 @@ static enum dc_psr_state convert_psr_state(uint32_t raw_state) state = PSR_STATE_HWLOCK_MGR; else if (raw_state == 0x61) state = PSR_STATE_POLLVUPDATE; + else if (raw_state == 0x62) + state = PSR_STATE_RELEASE_HWLOCK_MGR_FULL_FRAME; else state = PSR_STATE_INVALID; @@ -363,6 +365,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->debug.bitfields.visual_confirm = dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR; copy_settings_data->debug.bitfields.use_hw_lock_mgr = 1; copy_settings_data->debug.bitfields.force_full_frame_update = 0; + copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm; if (psr_context->su_granularity_required == 0) copy_settings_data->su_y_granularity = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 2a21bcf5224f..14f935961672 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -167,6 +167,8 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, copy_settings_data->smu_optimizations_en = link->replay_settings.replay_smu_opt_enable; copy_settings_data->replay_timing_sync_supported = link->replay_settings.config.replay_timing_sync_supported; + copy_settings_data->debug.bitfields.enable_ips_visual_confirm = dc->dc->debug.enable_ips_visual_confirm; + copy_settings_data->flags.u32All = 0; copy_settings_data->flags.bitfields.fec_enable_status = (link->fec_state == dc_link_fec_enabled); copy_settings_data->flags.bitfields.dsc_enable_status = (pipe_ctx->stream->timing.flags.DSC == 1); @@ -185,8 +187,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, else copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0; - - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 49bcfe6ec999..fa422a8cbced 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -1955,6 +1955,7 @@ void dce110_tg_program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index 28c58f1dff2d..ee4de740aceb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -261,6 +261,7 @@ void dce110_tg_program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c index bf35dc65ca29..9837dec837ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c @@ -438,6 +438,7 @@ static void dce110_timing_generator_v_program_timing(struct timing_generator *tg int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index eb3557965781..fcf59348eb62 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -697,6 +697,7 @@ static void dce120_tg_program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c index c1a85ee374d9..e5fb0e8333e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_timing_generator.c @@ -111,13 +111,14 @@ static void program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { if (!use_vbios) program_pix_dur(tg, timing->pix_clk_100hz); - dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios); + dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, 0, use_vbios); } static void dce60_timing_generator_enable_advanced_request( diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c index 2df4654858be..003a9330c286 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c @@ -111,13 +111,14 @@ static void program_timing(struct timing_generator *tg, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { if (!use_vbios) program_pix_dur(tg, timing->pix_clk_100hz); - dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios); + dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, 0, use_vbios); } static void dce80_timing_generator_enable_advanced_request( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 9923d0d620d4..e1f6623d4936 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -24,8 +24,6 @@ DCN10 = dcn10_ipp.o \ dcn10_hw_sequencer_debug.o \ - dcn10_opp.o \ - dcn10_mpc.o \ dcn10_cm_common.o \ AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 0b49362f71b0..eaed5d1c398a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -591,6 +591,8 @@ bool cm_helper_translate_curve_to_degamma_hw_format( i += increment) { if (j == hw_points - 1) break; + if (i >= TRANSFER_FUNC_POINTS) + return false; rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index b3aeabc4d605..25ba0d310d46 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -1,8 +1,7 @@ # SPDX-License-Identifier: MIT # Copyright © 2019-2024 Advanced Micro Devices, Inc. All rights reserved. -DCN20 = dcn20_mpc.o dcn20_opp.o dcn20_mmhubbub.o \ - dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o +DCN20 = dcn20_vmid.o dcn20_dwb.o dcn20_dwb_scl.o AMD_DAL_DCN20 = $(addprefix $(AMDDALPATH)/dc/dcn20/,$(DCN20)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 4c43af867d86..b17277de0340 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -23,15 +23,11 @@ # # -DCN30 := dcn30_mpc.o dcn30_vpg.o \ +DCN30 := dcn30_vpg.o \ dcn30_afmt.o \ - dcn30_dwb.o \ - dcn30_dwb_cm.o \ dcn30_cm_common.o \ dcn30_mmhubbub.o \ - - AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN30) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index b8327237ed44..f31f0e3abfc0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -28,7 +28,7 @@ #include "reg_helper.h" #include "dcn30/dcn30_dpp.h" #include "basics/conversion.h" -#include "dcn30_cm_common.h" +#include "dcn30/dcn30_cm_common.h" #include "custom_float.h" #define REG(reg) reg @@ -177,6 +177,8 @@ bool cm3_helper_translate_curve_to_hw_format( i += increment) { if (j == hw_points) break; + if (i >= TRANSFER_FUNC_POINTS) + return false; rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; @@ -335,6 +337,8 @@ bool cm3_helper_translate_curve_to_degamma_hw_format( i += increment) { if (j == hw_points - 1) break; + if (i >= TRANSFER_FUNC_POINTS) + return false; rgb_resulted[j].red = output_tf->tf_pts.red[i]; rgb_resulted[j].green = output_tf->tf_pts.green[i]; rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index dc37dbf870df..fb4814ab3f05 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -3,7 +3,7 @@ # # Makefile for dcn30. -DCN301 = dcn301_dio_link_encoder.o dcn301_panel_cntl.o +DCN301 = dcn301_panel_cntl.o AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile b/drivers/gpu/drm/amd/display/dc/dcn303/Makefile deleted file mode 100644 index a954e316aca2..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn303/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: MIT -# -# Copyright (C) 2021 Advanced Micro Devices, Inc. All the rights reserved -# -# Authors: AMD -# -# Makefile for dcn303. - -DCN3_03 = dcn303_init.o - -AMD_DAL_DCN3_03 = $(addprefix $(AMDDALPATH)/dc/dcn303/,$(DCN3_03)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_03) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index e2601d0aba41..d510e4652c18 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -5,7 +5,7 @@ # Makefile for dcn31. DCN31 = dcn31_panel_cntl.o \ - dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \ + dcn31_apg.o \ dcn31_afmt.o dcn31_vpg.o AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile deleted file mode 100644 index 15fdcf7c6466..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2024 Advanced Micro Devices, Inc. All rights reserved. -# -# Makefile for dcn314. - -DCN314 = dcn314_dio_stream_encoder.o - -AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN314) diff --git a/drivers/gpu/drm/amd/display/dc/dcn401/Makefile b/drivers/gpu/drm/amd/display/dc/dcn401/Makefile deleted file mode 100644 index ded1f3140beb..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dcn401/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright © 2024 Advanced Micro Devices, Inc. All rights reserved. - -DCN401 += dcn401_dio_link_encoder.o -DCN401 += dcn401_dio_stream_encoder.o -DCN401 += dcn401_mpc.o - -AMD_DAL_DCN401 = $(addprefix $(AMDDALPATH)/dc/dcn401/,$(DCN401)) - -AMD_DISPLAY_FILES += $(AMD_DAL_DCN401) diff --git a/drivers/gpu/drm/amd/display/dc/dio/Makefile b/drivers/gpu/drm/amd/display/dc/dio/Makefile index 67840e474d7a..0dfd480976f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dio/Makefile @@ -52,6 +52,15 @@ AMD_DAL_DIO_DCN30 = $(addprefix $(AMDDALPATH)/dc/dio/dcn30/,$(DIO_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN30) ############################################################################### +# DCN301 +############################################################################### +DIO_DCN301 = dcn301_dio_link_encoder.o + +AMD_DAL_DIO_DCN301 = $(addprefix $(AMDDALPATH)/dc/dio/dcn301/,$(DIO_DCN301)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN301) + +############################################################################### # DCN31 ############################################################################### DIO_DCN31 = dcn31_dio_link_encoder.o @@ -61,6 +70,15 @@ AMD_DAL_DIO_DCN31 = $(addprefix $(AMDDALPATH)/dc/dio/dcn31/,$(DIO_DCN31)) AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN31) ############################################################################### +# DCN314 +############################################################################### +DIO_DCN314 = dcn314_dio_stream_encoder.o + +AMD_DAL_DIO_DCN314 = $(addprefix $(AMDDALPATH)/dc/dio/dcn314/,$(DIO_DCN314)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DIO_DCN314) + +############################################################################### # DCN32 ############################################################################### DIO_DCN32 = dcn32_dio_link_encoder.o dcn32_dio_stream_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c index 1b39a6e8a1ac..1b39a6e8a1ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h index 49f8d91d4951..49f8d91d4951 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_dio_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn301/dcn301_dio_link_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c index 5b343f745cf3..5b343f745cf3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h index 86548be591be..86548be591be 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn314/dcn314_dio_stream_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c index 05783daa62ac..2ed382a8e79c 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn321/dcn321_dio_link_encoder.c @@ -23,7 +23,6 @@ * */ - #include "reg_helper.h" #include "core_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c index 6a179e5ab417..6ab2a218b769 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_stream_encoder.c @@ -22,7 +22,6 @@ * */ - #include "dc_bios_types.h" #include "dcn30/dcn30_dio_stream_encoder.h" #include "dcn314/dcn314_dio_stream_encoder.h" @@ -392,6 +391,14 @@ static void enc35_reset_fifo(struct stream_encoder *enc, bool reset) udelay(10); } +static bool enc35_is_fifo_enabled(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t reset_val; + + REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, &reset_val); + return (reset_val == 0) ? false : true; +} void enc35_disable_fifo(struct stream_encoder *enc) { struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); @@ -415,6 +422,24 @@ void enc35_enable_fifo(struct stream_encoder *enc) REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1); } +static uint32_t enc35_get_pixels_per_cycle(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t value; + + REG_GET(DIG_FIFO_CTRL0, DIG_FIFO_OUTPUT_PIXEL_MODE, &value); + + switch (value) { + case 0: + return 1; + case 1: + return 2; + default: + ASSERT_CRITICAL(false); + return 1; + } +} + static const struct stream_encoder_funcs dcn35_str_enc_funcs = { .dp_set_odm_combine = enc314_dp_set_odm_combine, @@ -465,7 +490,9 @@ static const struct stream_encoder_funcs dcn35_str_enc_funcs = { .set_input_mode = enc314_set_dig_input_mode, .enable_fifo = enc35_enable_fifo, .disable_fifo = enc35_disable_fifo, + .is_fifo_enabled = enc35_is_fifo_enabled, .map_stream_to_link = enc35_stream_encoder_map_to_link, + .get_pixels_per_cycle = enc35_get_pixels_per_cycle, }; void dcn35_dio_stream_encoder_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h index 34adae7ab6e8..2e4a46f1b499 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h +++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h @@ -210,4 +210,7 @@ enum adaptive_sync_type dm_get_adaptive_sync_support_type(struct dc_link *link); enum dc_edid_status dm_helpers_get_sbios_edid(struct dc_link *link, struct dc_edid *edid); +bool dm_helpers_is_fullscreen(struct dc_context *ctx, struct dc_stream_state *stream); +bool dm_helpers_is_hdr_on(struct dc_context *ctx, struct dc_stream_state *stream); + #endif /* __DM_HELPERS__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 3c0222aa4df1..46f9c05de16e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -83,6 +83,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcfla CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 8a8efe408a9d..e9fea9c2162e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -1132,7 +1132,8 @@ static void dcn20_adjust_freesync_v_startup( patched_crtc_timing.v_addressable - patched_crtc_timing.v_border_top; - newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start); + /* The newVStartUp is 1 line before vsync point */ + newVstartup = asic_blank_end + 1; *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start); } @@ -1562,6 +1563,8 @@ int dcn20_populate_dml_pipes_from_context(struct dc *dc, pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width; pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 255) / 256) * 256; pipes[pipe_cnt].pipe.src.source_format = dm_444_32; + pipes[pipe_cnt].pipe.src.cur0_src_width = 0; + pipes[pipe_cnt].pipe.src.cur1_src_width = 0; pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/ pipes[pipe_cnt].pipe.dest.recout_height = pipes[pipe_cnt].pipe.src.viewport_height; /*vp_height/vratio*/ pipes[pipe_cnt].pipe.dest.full_recout_width = pipes[pipe_cnt].pipe.dest.recout_width; /*when is_hsplit != 1*/ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index 7c56ad0f8812..e7019c95ba79 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -78,7 +78,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index 3d95bfa5aca2..ae5251041728 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -78,7 +78,7 @@ static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 98502a4f0567..9e1c18b90805 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -53,7 +53,7 @@ static void calculate_ttu_cursor( static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9d399c4ce957..6f490d8d7038 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -160,8 +160,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { .pct_ideal_sdp_bw_after_urgent = 90.0, .pct_ideal_fabric_bw_after_urgent = 67.0, .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, .pct_ideal_dram_bw_after_urgent_strobe = 67.0, .max_avg_sdp_bw_use_normal_percent = 80.0, .max_avg_fabric_bw_use_normal_percent = 60.0, @@ -871,8 +871,9 @@ static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context) * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, * and the max of (VBLANK blanking time, MALL region)). */ - if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && - subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) + if (drr_timing && + stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && + subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) schedulable = true; return schedulable; @@ -937,7 +938,7 @@ static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) if (!subvp_pipe && pipe_mall_type == SUBVP_MAIN) subvp_pipe = pipe; } - if (found) { + if (found && subvp_pipe) { phantom_stream = dc_state_get_paired_subvp_stream(context, subvp_pipe->stream); main_timing = &subvp_pipe->stream->timing; phantom_timing = &phantom_stream->timing; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 4297402bdab3..8839faf42207 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -139,8 +139,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .pct_ideal_sdp_bw_after_urgent = 90.0, .pct_ideal_fabric_bw_after_urgent = 67.0, .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, .pct_ideal_dram_bw_after_urgent_strobe = 67.0, .max_avg_sdp_bw_use_normal_percent = 80.0, .max_avg_fabric_bw_use_normal_percent = 60.0, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 410e4b671228..641a8cd019cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -523,6 +523,7 @@ struct _vcs_dpi_display_pipe_dest_params_st { unsigned int vupdate_offset; unsigned int vupdate_width; unsigned int vready_offset; + unsigned int pstate_keepout; unsigned char interlaced; double pixel_rate_mhz; unsigned char synchronized_vblank_all_planes; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index dae13f202220..d8bfc85e5dcd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -39,7 +39,7 @@ static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { - unsigned int ret_val = 0; + unsigned int ret_val = 1; if (source_format == dm_444_16) { if (!is_chroma) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index fea857214c0f..c4378e620cbf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -35,8 +35,6 @@ frame_warn_flag := -Wframe-larger-than=2048 endif endif -# DRIVER_BUILD is mostly used in DML2.1 source -subdir-ccflags-y += -DDRIVER_BUILD=1 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_core subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2/dml21/src/dml2_mcg/ @@ -81,13 +79,11 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization := $(dml2_ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_ccflags) @@ -104,13 +100,11 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.o : CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_shared.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.o := $(dml2_rcflags) @@ -126,13 +120,11 @@ DML21 += src/inc/dml2_debug.o DML21 += src/dml2_core/dml2_core_dcn4.o DML21 += src/dml2_core/dml2_core_factory.o DML21 += src/dml2_core/dml2_core_dcn4_calcs.o -DML21 += src/dml2_core/dml2_core_shared.o DML21 += src/dml2_dpmm/dml2_dpmm_dcn4.o DML21 += src/dml2_dpmm/dml2_dpmm_factory.o DML21 += src/dml2_mcg/dml2_mcg_dcn4.o DML21 += src/dml2_mcg/dml2_mcg_factory.o DML21 += src/dml2_pmo/dml2_pmo_dcn3.o -DML21 += src/dml2_pmo/dml2_pmo_dcn4.o DML21 += src/dml2_pmo/dml2_pmo_factory.o DML21 += src/dml2_pmo/dml2_pmo_dcn4_fams2.o DML21 += src/dml2_standalone_libraries/lib_float_math.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 06387b8b0aee..710a25dcfef0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -31,7 +31,7 @@ static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_ else soc_bb = &dml2_socbb_dcn401; - qos_params = &dml_dcn401_soc_qos_params; + qos_params = &dml_dcn4_variant_a_soc_qos_params; } /* patch soc bb */ @@ -516,7 +516,7 @@ static void populate_dml21_stream_overrides_from_stream_state( if (!stream->ctx->dc->debug.enable_single_display_2to1_odm_policy || stream->debug.force_odm_combine_segments > 0) stream_desc->overrides.disable_dynamic_odm = true; - stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp; + stream_desc->overrides.disable_subvp = stream->ctx->dc->debug.force_disable_subvp || stream->hw_cursor_req; } static enum dml2_swizzle_mode gfx_addr3_to_dml2_swizzle_mode(enum swizzle_mode_addr3_values addr3_mode) @@ -725,18 +725,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm const struct scaler_data *scaler_data = get_scaler_data_for_plane(dml_ctx, plane_state, context); struct dc_stream_state *stream = context->streams[stream_index]; - if (stream->cursor_attributes.color_format == CURSOR_MODE_MONO) - plane->cursor.cursor_bpp = 2; - else if (stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_1BIT_AND - || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA - || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - plane->cursor.cursor_bpp = 32; - } else if (stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED - || stream->cursor_attributes.color_format == CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED) { - plane->cursor.cursor_bpp = 64; - } else - plane->cursor.cursor_bpp = 32; - + plane->cursor.cursor_bpp = 32; plane->cursor.cursor_width = 256; plane->cursor.num_cursors = 1; @@ -827,6 +816,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm if (plane_state->mcm_luts.lut3d_data.lut3d_src == DC_CM2_TRANSFER_FUNC_SOURCE_VIDMEM) { plane->tdlut.setup_for_tdlut = true; + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.layout) { case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_RGB: case DC_CM2_GPU_MEM_LAYOUT_3D_SWIZZLE_LINEAR_BGR: @@ -836,6 +826,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->tdlut.tdlut_addressing_mode = dml2_tdlut_simple_linear; break; } + switch (plane_state->mcm_luts.lut3d_data.gpu_mem_params.size) { case DC_CM2_GPU_MEM_SIZE_171717: plane->tdlut.tdlut_width_mode = dml2_tdlut_width_17_cube; @@ -844,8 +835,8 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm //plane->tdlut.tdlut_width_mode = dml2_tdlut_width_flatten; // dml2_tdlut_width_flatten undefined break; } - } else - plane->tdlut.setup_for_tdlut = false; + } + plane->tdlut.setup_for_tdlut |= dml_ctx->config.force_tdlut_enable; plane->dynamic_meta_data.enable = false; plane->dynamic_meta_data.lines_before_active_required = 0; @@ -949,6 +940,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s int stream_index, plane_index; int disp_cfg_stream_location, disp_cfg_plane_location; struct dml2_display_cfg *dml_dispcfg = &dml_ctx->v21.display_config; + unsigned int plane_count = 0; memset(&dml_ctx->v21.dml_to_dc_pipe_mapping, 0, sizeof(struct dml2_dml_to_dc_pipe_mapping)); @@ -958,6 +950,11 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->minimize_det_reallocation = true; dml_dispcfg->overrides.enable_subvp_implicit_pmo = true; + if (in_dc->debug.disable_unbounded_requesting) { + dml_dispcfg->overrides.hw.force_unbounded_requesting.enable = true; + dml_dispcfg->overrides.hw.force_unbounded_requesting.value = false; + } + for (stream_index = 0; stream_index < context->stream_count; stream_index++) { disp_cfg_stream_location = map_stream_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]); @@ -1002,33 +999,39 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->plane_descriptors[disp_cfg_plane_location].overrides.uclk_pstate_change_strategy = dml21_force_pstate_method_to_uclk_state_change_strategy(dml_ctx->config.pmo.force_pstate_method_values[stream_index]); } + + plane_count++; } } } + if (plane_count == 0) { + dml_dispcfg->overrides.all_streams_blanked = true; + } + return true; } void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context) { /* TODO these should be the max of active, svp prefetch and idle should be tracked seperately */ - context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dispclk_khz; - context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.dcfclk_khz; - context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.uclk_khz; - context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.active.fclk_khz; - context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.idle.uclk_khz; - context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.idle.fclk_khz; - context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.deepsleep_dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dispclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dispclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.dcfclk_khz; + context->bw_ctx.bw.dcn.clk.dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.uclk_khz; + context->bw_ctx.bw.dcn.clk.fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.active.fclk_khz; + context->bw_ctx.bw.dcn.clk.idle_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.uclk_khz; + context->bw_ctx.bw.dcn.clk.idle_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.idle.fclk_khz; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.deepsleep_dcfclk_khz; context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = in_ctx->v21.mode_programming.programming->fclk_pstate_supported; context->bw_ctx.bw.dcn.clk.p_state_change_support = in_ctx->v21.mode_programming.programming->uclk_pstate_supported; - context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dtbrefclk_khz > 0; - context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4.dtbrefclk_khz; + context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0; + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz; } void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx) { struct dml2_core_internal_display_mode_lib *mode_lib = &in_ctx->v21.dml_init.dml2_instance->core_instance.clean_me_up.mode_lib; - double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;; + double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; if (reg_set_idx >= DML2_DCHUB_WATERMARK_SET_NUM) { /* invalid register set index */ @@ -1053,16 +1056,16 @@ static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_wat switch (wm_index) { case DML2_DCHUB_WATERMARK_SET_A: - wm_regs = &watermarks->dcn4.a; + wm_regs = &watermarks->dcn4x.a; break; case DML2_DCHUB_WATERMARK_SET_B: - wm_regs = &watermarks->dcn4.b; + wm_regs = &watermarks->dcn4x.b; break; case DML2_DCHUB_WATERMARK_SET_C: - wm_regs = &watermarks->dcn4.c; + wm_regs = &watermarks->dcn4x.c; break; case DML2_DCHUB_WATERMARK_SET_D: - wm_regs = &watermarks->dcn4.d; + wm_regs = &watermarks->dcn4x.d; break; case DML2_DCHUB_WATERMARK_SET_NUM: default: @@ -1110,10 +1113,11 @@ void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_ global_sync = &stream_programming->phantom_stream.global_sync; } - pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4.vstartup_lines; - pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4.vupdate_offset_pixels; - pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4.vupdate_vupdate_width_pixels; - pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4.vready_offset_pixels; + pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4x.vstartup_lines; + pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4x.vupdate_offset_pixels; + pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4x.vupdate_vupdate_width_pixels; + pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4x.vready_offset_pixels; + pipe_ctx->pipe_dlg_param.pstate_keepout = global_sync->dcn4x.pstate_keepout_start_lines; pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst; @@ -1164,3 +1168,37 @@ void dml21_get_pipe_mcache_config( mcache_pipe_config->plane1_enabled = dml21_is_plane1_enabled(pln_prog->plane_descriptor->pixel_format); } + +void dml21_set_dc_p_state_type( + struct pipe_ctx *pipe_ctx, + struct dml2_per_stream_programming *stream_programming, + bool sub_vp_enabled) +{ + switch (stream_programming->uclk_pstate_method) { + case dml2_uclk_pstate_support_method_vactive: + case dml2_uclk_pstate_support_method_fw_vactive_drr: + pipe_ctx->p_state_type = P_STATE_V_ACTIVE; + break; + case dml2_uclk_pstate_support_method_vblank: + case dml2_uclk_pstate_support_method_fw_vblank_drr: + if (sub_vp_enabled) + pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP; + else + pipe_ctx->p_state_type = P_STATE_V_BLANK; + break; + case dml2_uclk_pstate_support_method_fw_subvp_phantom: + case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: + pipe_ctx->p_state_type = P_STATE_SUB_VP; + break; + case dml2_uclk_pstate_support_method_fw_drr: + if (sub_vp_enabled) + pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP; + else + pipe_ctx->p_state_type = P_STATE_FPO; + break; + default: + pipe_ctx->p_state_type = P_STATE_UNKNOWN; + break; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h index 4cc0a1fbb93d..476a7f6e4875 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h @@ -26,4 +26,5 @@ void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_water void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx); void dml21_map_hw_resources(struct dml2_context *dml_ctx); void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config); +void dml21_set_dc_p_state_type(struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming, bool sub_vp_enabled); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index d276458e50fd..51d491bffa32 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -11,7 +11,6 @@ #include "dml2_core_dcn4_calcs.h" - int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id) { int i; @@ -280,6 +279,23 @@ bool check_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) dc_is_dp_signal(pipe_ctx->stream->signal)); } + +static bool is_sub_vp_enabled(struct dc *dc, struct dc_state *context) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && dc_state_get_paired_subvp_stream(context, pipe_ctx->stream) && + dc_state_get_pipe_subvp_type(context, pipe_ctx) == SUBVP_MAIN) { + return true; + } + } + return false; +} + + void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_per_stream_programming *stream_prog) { @@ -310,12 +326,16 @@ void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *contex pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64; } - pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4.dppclk_khz; + pipe_ctx->plane_res.bw.dppclk_khz = pln_prog->min_clocks.dcn4x.dppclk_khz; if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipe_ctx->plane_res.bw.dppclk_khz) context->bw_ctx.bw.dcn.clk.dppclk_khz = pipe_ctx->plane_res.bw.dppclk_khz; dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx); memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[pipe_ctx->pipe_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation)); + + bool sub_vp_enabled = is_sub_vp_enabled(pipe_ctx->stream->ctx->dc, context); + + dml21_set_dc_p_state_type(pipe_ctx, stream_prog, sub_vp_enabled); } static struct dc_stream_state *dml21_add_phantom_stream(struct dml2_context *dml_ctx, @@ -459,94 +479,103 @@ void dml21_build_fams2_programming(const struct dc *dc, struct dml2_context *dml_ctx) { int i, j, k; + unsigned int num_fams2_streams = 0; /* reset fams2 data */ - context->bw_ctx.bw.dcn.fams2_stream_count = 0; memset(&context->bw_ctx.bw.dcn.fams2_stream_params, 0, sizeof(struct dmub_fams2_stream_static_state) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config)); - if (!dml_ctx->v21.mode_programming.programming->fams2_required) - return; + if (dml_ctx->v21.mode_programming.programming->fams2_required) { + for (i = 0; i < context->stream_count; i++) { + int dml_stream_idx; + struct dc_stream_state *phantom_stream; + struct dc_stream_status *phantom_status; - for (i = 0; i < context->stream_count; i++) { - int dml_stream_idx; - struct dc_stream_state *phantom_stream; - struct dc_stream_status *phantom_status; - - struct dmub_fams2_stream_static_state *static_state = &context->bw_ctx.bw.dcn.fams2_stream_params[context->bw_ctx.bw.dcn.fams2_stream_count]; - - struct dc_stream_state *stream = context->streams[i]; - - if (context->stream_status[i].plane_count == 0 || - dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) { - /* can ignore blanked or phantom streams */ - continue; - } + struct dmub_fams2_stream_static_state *static_state = &context->bw_ctx.bw.dcn.fams2_stream_params[num_fams2_streams]; - dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id); - if (dml_stream_idx < 0) { - ASSERT(dml_stream_idx >= 0); - continue; - } + struct dc_stream_state *stream = context->streams[i]; - /* copy static state from PMO */ - memcpy(static_state, - &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_params, - sizeof(struct dmub_fams2_stream_static_state)); - - /* get information from context */ - static_state->num_planes = context->stream_status[i].plane_count; - static_state->otg_inst = context->stream_status[i].primary_otg_inst; - - /* populate pipe masks for planes */ - for (j = 0; j < context->stream_status[i].plane_count; j++) { - for (k = 0; k < dc->res_pool->pipe_count; k++) { - if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { - static_state->pipe_mask |= (1 << k); - static_state->plane_pipe_masks[j] |= (1 << k); - } + if (context->stream_status[i].plane_count == 0 || + dml_ctx->config.svp_pstate.callbacks.get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) { + /* can ignore blanked or phantom streams */ + continue; } - } - /* get per method programming */ - switch (static_state->type) { - case FAMS2_STREAM_TYPE_VBLANK: - case FAMS2_STREAM_TYPE_VACTIVE: - case FAMS2_STREAM_TYPE_DRR: - break; - case FAMS2_STREAM_TYPE_SUBVP: - phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream); - if (!phantom_stream) - break; + dml_stream_idx = dml21_helper_find_dml_pipe_idx_by_stream_id(dml_ctx, stream->stream_id); + if (dml_stream_idx < 0) { + ASSERT(dml_stream_idx >= 0); + continue; + } - phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream); + /* copy static state from PMO */ + memcpy(static_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_params, + sizeof(struct dmub_fams2_stream_static_state)); - /* phantom status should always be present */ - ASSERT(phantom_status); - static_state->sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + /* get information from context */ + static_state->num_planes = context->stream_status[i].plane_count; + static_state->otg_inst = context->stream_status[i].primary_otg_inst; - /* populate pipe masks for phantom planes */ - for (j = 0; j < phantom_status->plane_count; j++) { + /* populate pipe masks for planes */ + for (j = 0; j < context->stream_status[i].plane_count; j++) { for (k = 0; k < dc->res_pool->pipe_count; k++) { if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { - static_state->sub_state.subvp.phantom_pipe_mask |= (1 << k); - static_state->sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { + static_state->pipe_mask |= (1 << k); + static_state->plane_pipe_masks[j] |= (1 << k); + } + } + } + + /* get per method programming */ + switch (static_state->type) { + case FAMS2_STREAM_TYPE_VBLANK: + case FAMS2_STREAM_TYPE_VACTIVE: + case FAMS2_STREAM_TYPE_DRR: + break; + case FAMS2_STREAM_TYPE_SUBVP: + phantom_stream = dml_ctx->config.svp_pstate.callbacks.get_paired_subvp_stream(context, stream); + if (!phantom_stream) + break; + + phantom_status = dml_ctx->config.callbacks.get_stream_status(context, phantom_stream); + + /* phantom status should always be present */ + ASSERT(phantom_status); + static_state->sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + + /* populate pipe masks for phantom planes */ + for (j = 0; j < phantom_status->plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { + static_state->sub_state.subvp.phantom_pipe_mask |= (1 << k); + static_state->sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + } } } + break; + default: + ASSERT(false); + break; } - break; - default: - ASSERT(false); - break; + + num_fams2_streams++; } + } + + if (num_fams2_streams > 0) { + /* copy FAMS2 configuration */ + memcpy(&context->bw_ctx.bw.dcn.fams2_global_config, + &dml_ctx->v21.mode_programming.programming->fams2_global_config, + sizeof(struct dmub_cmd_fams2_global_config)); - context->bw_ctx.bw.dcn.fams2_stream_count++; + context->bw_ctx.bw.dcn.fams2_global_config.num_streams = num_fams2_streams; } - context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_stream_count > 0; + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; } bool dml21_is_plane1_enabled(enum dml2_source_format_class source_format) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 41ecf00ed196..d35dd507cb9f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -66,7 +66,9 @@ static void dml21_apply_debug_options(const struct dc *in_dc, struct dml2_contex disable_fams2; pmo_options->disable_fams2 = disable_fams2; - pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming; + pmo_options->disable_drr_var_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE || + in_dc->debug.disable_fams_gaming == INGAME_FAMS_MULTI_DISP_CLAMPED_ONLY; + pmo_options->disable_drr_clamped_when_var_active = in_dc->debug.disable_fams_gaming == INGAME_FAMS_DISABLE; } static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h index 521f77b8ac44..d82c681a5402 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h @@ -72,7 +72,7 @@ static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -128,7 +128,7 @@ static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }; static const struct dml2_soc_bb dml2_socbb_dcn31 = { @@ -228,7 +228,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn31 = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -332,7 +332,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn31 = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }, .power_management_parameters = { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index fe07fcc3d0d5..8ef7977841de 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -8,7 +8,7 @@ #include "dml_top_soc_parameter_types.h" -static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { +static const struct dml2_soc_qos_parameters dml_dcn4_variant_a_soc_qos_params = { .derate_table = { .system_active_urgent = { .dram_derate_percent_pixel = 22, @@ -52,7 +52,7 @@ static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -78,7 +78,7 @@ static const struct dml2_soc_qos_parameters dml_dcn401_soc_qos_params = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }; static const struct dml2_soc_bb dml2_socbb_dcn401 = { @@ -178,7 +178,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn401 = { .scaling_factor_mhz = 0, }, .qos_params = { - .dcn4 = { + .dcn4x = { .df_qos_response_time_fclk_cycles = 300, .max_round_trip_to_furthest_cs_fclk_cycles = 350, .mall_overhead_fclk_cycles = 50, @@ -282,7 +282,7 @@ static const struct dml2_soc_bb dml2_socbb_dcn401 = { }, }, }, - .qos_type = dml2_qos_param_type_dcn4, + .qos_type = dml2_qos_param_type_dcn4x, }, .power_management_parameters = { @@ -344,6 +344,9 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .config_return_buffer_segment_size_in_kbytes = 64, .meta_fifo_size_in_kentries = 22, .compressed_buffer_segment_size_in_kbytes = 64, + .max_flip_time_us = 80, + .max_flip_time_lines = 32, + .hostvm_mode = 0, .subvp_drr_scheduling_margin_us = 100, .subvp_prefetch_end_to_mall_start_us = 15, .subvp_fw_processing_delay = 15, @@ -351,14 +354,18 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .fams2 = { .max_allow_delay_us = 100 * 1000, - .scheduling_delay_us = 50, - .vertical_interrupt_ack_delay_us = 18, + .scheduling_delay_us = 125, + .vertical_interrupt_ack_delay_us = 40, .allow_programming_delay_us = 18, .min_allow_width_us = 20, .subvp_df_throttle_delay_us = 100, - .subvp_programming_delay_us = 18, + .subvp_programming_delay_us = 200, .subvp_prefetch_to_mall_delay_us = 18, - .drr_programming_delay_us = 18, + .drr_programming_delay_us = 35, + + .lock_timeout_us = 5000, + .recovery_timeout_us = 5000, + .flip_programming_delay_us = 300, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h index a25f4e5977cf..a64ec4dcf11a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_H__ #define __DML_TOP_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h index 8247289ce7d3..83fc15bf13cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __dml2_TOP_DCHUB_REGISTERS_H__ #define __dml2_TOP_DCHUB_REGISTERS_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h index daae77f2672b..b132f676a68d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_DISPLAY_CFG_TYPES_H__ #define __DML_TOP_DISPLAY_CFG_TYPES_H__ @@ -411,7 +410,6 @@ struct dml2_stream_parameters { enum dml2_odm_mode odm_mode; bool disable_dynamic_odm; bool disable_subvp; - bool disable_fams2_drr; int minimum_vblank_idle_requirement_us; bool minimize_active_latency_hiding; @@ -478,6 +476,7 @@ struct dml2_display_cfg { bool max_outstanding_when_urgent_expected_disable; bool enable_subvp_implicit_pmo; //enables PMO to switch pipe uclk strategy to subvp, and generate phantom programming unsigned int best_effort_min_active_latency_hiding_us; + bool all_streams_blanked; } overrides; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h index 2f444f448770..8f624a912e78 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_policy_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_POLICY_TYPES_H__ #define __DML_TOP_POLICY_TYPES_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index 065b2afab6fb..ebd8abe894a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_SOC_PARAMETER_TYPES_H__ #define __DML_TOP_SOC_PARAMETER_TYPES_H__ @@ -27,7 +26,7 @@ struct dml2_soc_derates { struct dml2_soc_derate_values system_idle_average; }; -struct dml2_dcn3_soc_qos_params { +struct dml2_dcn32x_soc_qos_params { struct { unsigned int base_latency_us; unsigned int base_latency_pixel_vm_us; @@ -53,7 +52,7 @@ struct dml2_dcn4_uclk_dpm_dependent_qos_params { unsigned int average_latency_when_non_urgent_uclk_cycles; }; -struct dml2_dcn4_soc_qos_params { +struct dml2_dcn4x_soc_qos_params { unsigned int df_qos_response_time_fclk_cycles; unsigned int max_round_trip_to_furthest_cs_fclk_cycles; unsigned int mall_overhead_fclk_cycles; @@ -69,7 +68,7 @@ struct dml2_dcn4_soc_qos_params { enum dml2_qos_param_type { dml2_qos_param_type_dcn3, - dml2_qos_param_type_dcn4 + dml2_qos_param_type_dcn4x }; struct dml2_soc_qos_parameters { @@ -81,8 +80,8 @@ struct dml2_soc_qos_parameters { } writeback; union { - struct dml2_dcn3_soc_qos_params dcn3; - struct dml2_dcn4_soc_qos_params dcn4; + struct dml2_dcn32x_soc_qos_params dcn32x; + struct dml2_dcn4x_soc_qos_params dcn4x; } qos_params; enum dml2_qos_param_type qos_type; @@ -152,6 +151,7 @@ struct dml2_soc_bb { double phy_downspread_percent; double dcn_downspread_percent; double dispclk_dppclk_vco_speed_mhz; + bool no_dfs; bool do_urgent_latency_adjustment; unsigned int mem_word_bytes; unsigned int num_dcc_mcaches; @@ -173,6 +173,7 @@ struct dml2_ip_capabilities { unsigned int meta_fifo_size_in_kentries; unsigned int compressed_buffer_segment_size_in_kbytes; unsigned int max_flip_time_us; + unsigned int max_flip_time_lines; unsigned int hostvm_mode; unsigned int subvp_drr_scheduling_margin_us; unsigned int subvp_prefetch_end_to_mall_start_us; @@ -190,6 +191,10 @@ struct dml2_ip_capabilities { unsigned int subvp_programming_delay_us; unsigned int subvp_prefetch_to_mall_delay_us; unsigned int drr_programming_delay_us; + + unsigned int lock_timeout_us; + unsigned int recovery_timeout_us; + unsigned int flip_programming_delay_us; } fams2; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index 8aa77bb190ea..1c773bbb9992 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -74,6 +74,7 @@ struct dml2_pmo_options { bool disable_drr_var; bool disable_drr_clamped; bool disable_drr_var_when_var_active; + bool disable_drr_clamped_when_var_active; bool disable_fams2; bool disable_vactive_det_fill_bw_pad; /* dml2_project_dcn4x_stage2_auto_drr_svp and above only */ bool disable_dyn_odm; @@ -228,7 +229,7 @@ struct dml2_per_plane_programming { union { struct { unsigned long dppclk_khz; - } dcn4; + } dcn4x; } min_clocks; struct dml2_mcache_surface_allocation mcache_allocation; @@ -262,7 +263,8 @@ union dml2_global_sync_programming { unsigned int vupdate_offset_pixels; unsigned int vupdate_vupdate_width_pixels; unsigned int vready_offset_pixels; - } dcn4; + unsigned int pstate_keepout_start_lines; + } dcn4x; }; struct dml2_per_stream_programming { @@ -273,7 +275,7 @@ struct dml2_per_stream_programming { unsigned long dscclk_khz; unsigned long dtbclk_khz; unsigned long phyclk_khz; - } dcn4; + } dcn4x; } min_clocks; union dml2_global_sync_programming global_sync; @@ -374,7 +376,7 @@ struct dml2_display_cfg_programming { unsigned long dispclk_khz; unsigned long dcfclk_deepsleep_khz; unsigned long dpp_ref_khz; - } dcn3; + } dcn32x; struct { struct { unsigned long uclk_khz; @@ -403,7 +405,7 @@ struct dml2_display_cfg_programming { uint32_t dpprefclk_did; uint32_t dtbrefclk_did; } divider_ids; - } dcn4; + } dcn4x; } min_clocks; bool uclk_pstate_supported; @@ -411,6 +413,7 @@ struct dml2_display_cfg_programming { /* indicates this configuration requires FW to support */ bool fams2_required; + struct dmub_cmd_fams2_global_config fams2_global_config; struct { bool supported_in_blank; // Changing to configurations where this is false requires stutter to be disabled during the transition diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 04edcde423a9..0aa4e4d343b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_internal_shared_types.h" #include "dml2_core_shared_types.h" #include "dml2_core_dcn4.h" @@ -10,7 +9,7 @@ #include "dml2_debug.h" #include "lib_float_math.h" -struct dml2_core_ip_params core_dcn4_ip_caps_base = { +static const struct dml2_core_ip_params core_dcn4_ip_caps_base = { // Hardcoded values for DCN3x .vblank_nom_default_us = 668, .remote_iommu_outstanding_translations = 256, @@ -70,6 +69,7 @@ struct dml2_core_ip_params core_dcn4_ip_caps_base = { .max_num_dp2p0_streams = 4, .imall_supported = 1, .max_flip_time_us = 80, + .max_flip_time_lines = 32, .words_per_channel = 16, .subvp_fw_processing_delay_us = 15, @@ -77,84 +77,6 @@ struct dml2_core_ip_params core_dcn4_ip_caps_base = { .subvp_swath_height_margin_lines = 16, }; -struct dml2_core_ip_params core_dcn4sw_ip_caps_base = { - .vblank_nom_default_us = 668, - .remote_iommu_outstanding_translations = 256, - .rob_buffer_size_kbytes = 192, - .config_return_buffer_size_in_kbytes = 1280, - .config_return_buffer_segment_size_in_kbytes = 64, - .compressed_buffer_segment_size_in_kbytes = 64, - .dpte_buffer_size_in_pte_reqs_luma = 68, - .dpte_buffer_size_in_pte_reqs_chroma = 36, - .pixel_chunk_size_kbytes = 8, - .alpha_pixel_chunk_size_kbytes = 4, - .min_pixel_chunk_size_bytes = 1024, - .writeback_chunk_size_kbytes = 8, - .line_buffer_size_bits = 1171920, - .max_line_buffer_lines = 32, - .writeback_interface_buffer_size_kbytes = 90, - - //Number of pipes after DCN Pipe harvesting - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dispclk_ramp_margin_percent = 1, - .dppclk_delay_subtotal = 47, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 28, - .dppclk_delay_cnvc_cursor = 6, - .cursor_buffer_size = 24, - .cursor_chunk_size = 2, - .dispclk_delay_subtotal = 125, - .max_inter_dcn_tile_repeaters = 8, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .writeback_line_buffer_buffer_size = 0, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 12, - .maximum_pixels_per_line_per_dsc_unit = 5760, - .dsc422_native_support = true, - .dcc_supported = true, - .ptoi_supported = false, - - .cursor_64bpp_support = true, - .dynamic_metadata_vm_enabled = false, - - .max_num_hdmi_frl_outputs = 1, - .max_num_dp2p0_outputs = 4, - .max_num_dp2p0_streams = 4, - .imall_supported = 1, - .max_flip_time_us = 80, - .words_per_channel = 16, - - .subvp_fw_processing_delay_us = 15, - .subvp_pstate_allow_width_us = 20, - .subvp_swath_height_margin_lines = 16, - - .dcn_mrq_present = 1, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_zs = 64, - .dcc_meta_buffer_size_bytes = 6272, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - - .dchub_arb_to_ret_delay = 102, - .hostvm_mode = 1, -}; - static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *ip_caps, const struct dml2_core_ip_params *ip_params) { ip_caps->pipe_count = ip_params->max_num_dpp; @@ -169,6 +91,7 @@ static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *i ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries; ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes; ip_caps->max_flip_time_us = ip_params->max_flip_time_us; + ip_caps->max_flip_time_lines = ip_params->max_flip_time_lines; ip_caps->hostvm_mode = ip_params->hostvm_mode; // FIXME_STAGE2: cleanup after adding all dv override to ip_caps @@ -192,6 +115,7 @@ static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries; ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes; ip_params->max_flip_time_us = ip_caps->max_flip_time_us; + ip_params->max_flip_time_lines = ip_caps->max_flip_time_lines; ip_params->hostvm_mode = ip_caps->hostvm_mode; } @@ -222,6 +146,7 @@ bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out) } memcpy(&core->clean_me_up.mode_lib.soc, in_out->soc_bb, sizeof(struct dml2_soc_bb)); + memcpy(&core->clean_me_up.mode_lib.ip_caps, in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); return true; } @@ -246,10 +171,12 @@ static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *p phantom->stream_index = phantom_stream_index; phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; - phantom->composition.viewport.plane0.height = (long int unsigned) math_ceil2( - (double)phantom->composition.viewport.plane0.height * (double)phantom_stream->timing.v_active / (double)main_stream->timing.v_active, 16.0); - phantom->composition.viewport.plane1.height = (long int unsigned) math_ceil2( - (double)phantom->composition.viewport.plane1.height * (double)phantom_stream->timing.v_active / (double)main_stream->timing.v_active, 16.0); + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); phantom->immediate_flip = false; phantom->dynamic_meta_data.enable = false; phantom->cursor.num_cursors = 0; @@ -344,6 +271,8 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in // Check if FAMS2 is required if (display_cfg->stage3.performed && display_cfg->stage3.success) { programming->fams2_required = display_cfg->stage3.fams2_required; + + dml2_core_calcs_get_global_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, &programming->fams2_global_config); } // Only loop over all the main streams (the implicit svp streams will be packed as part of the main stream) @@ -621,7 +550,7 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out l->mode_programming_ex_params.min_clk_table = in_out->instance->minimum_clock_table; l->mode_programming_ex_params.cfg_support_info = in_out->cfg_support_info; l->mode_programming_ex_params.programming = in_out->programming; - l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4.active.uclk_khz, + l->mode_programming_ex_params.min_clk_index = lookup_uclk_dpm_index_by_freq(in_out->programming->min_clocks.dcn4x.active.uclk_khz, &core->clean_me_up.mode_lib.soc); result = dml2_core_calcs_mode_programming_ex(&l->mode_programming_ex_params); @@ -641,20 +570,20 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out for (plane_index = 0; plane_index < in_out->programming->display_config.num_planes; plane_index++) { in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; - if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; - else if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; - else if (in_out->programming->display_config.plane_descriptors->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; - else { - if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vactive; - else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vblank; - else - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; - } + if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + else { + if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vactive; + else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vblank; + else + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; + } dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h index 235280c6dcf5..e62b2d3eeee6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_DCN4_H__ #define __DML2_CORE_DCN4_H__ bool core_dcn4_initialize(struct dml2_core_initialize_in_out *in_out); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 6f4026e396e0..805fd783131f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -8,35 +8,55 @@ #include "dml2_debug.h" #include "lib_float_math.h" #include "dml_top_types.h" -#include "dml2_core_shared.h" -#define DML_VM_PTE_ADL_PATCH_EN -//#define DML_TVM_UPDATE_EN -#define DML_TDLUT_ROW_BYTES_FIX_EN -#define DML_REG_LIMIT_CLAMP_EN #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 -static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + +static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder) +{ + *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0); + return dividend / divisor; +} + +static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) { dml2_printf("DML: ===================================== \n"); dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); - if (!fail_only || support->ImmediateFlipSupport == 0) - dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); - if (!fail_only || support->WritebackLatencySupport == 0) - dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); if (!fail_only || support->ScaleRatioAndTapsSupport == 0) dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); - if (!fail_only || support->P2IWith420 == 1) - dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); - if (!fail_only || support->DSCOnlyIfNecessaryWithBPP == 1) - dml2_printf("DML: support: DSCOnlyIfNecessaryWithBPP = %d\n", support->DSCOnlyIfNecessaryWithBPP); - if (!fail_only || support->DSC422NativeNotSupported == 1) - dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); - if (!fail_only || support->DSCSlicesODMModeSupported == 0) - dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->ViewportSizeSupport == 0) + dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) @@ -45,74 +65,87 @@ static void dml2_print_dml_mode_support_info(const struct dml2_core_internal_mod dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); if (!fail_only || support->MultistreamWithHDMIOreDP == 1) dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); if (!fail_only || support->NotEnoughLanesForMSO == 1) dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); - if (!fail_only || support->NumberOfOTGSupport == 0) - dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); - if (!fail_only || support->NumberOfHDMIFRLSupport == 0) - dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); - if (!fail_only || support->NumberOfDP2p0Support == 0) - dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); - if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) - dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); - if (!fail_only || support->CursorSupport == 0) - dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); - if (!fail_only || support->PitchSupport == 0) - dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); - if (!fail_only || support->ViewportExceedsSurface == 1) - dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); - if (!fail_only || support->ExceededMALLSize == 1) - dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); - if (!fail_only || support->EnoughWritebackUnits == 0) - dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); - if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) - dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); - if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); - if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) - dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); - if (!fail_only || support->ExceededMultistreamSlots == 1) - dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->P2IWith420 == 1) + dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); if (!fail_only || support->NotEnoughDSCUnits == 1) dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); if (!fail_only || support->NotEnoughDSCSlices == 1) dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); - if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) - dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); - if (!fail_only || support->LinkCapacitySupport == 0) - dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); if (!fail_only || support->ROBSupport == 0) dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); if (!fail_only || support->OutstandingRequestsSupport == 0) dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); - if (!fail_only || support->PTEBufferSizeNotExceeded == 0) - dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); - if (!fail_only || support->AvgBandwidthSupport == 0) - dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); - if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) - dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); if (!fail_only || support->PrefetchSupported == 0) dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); if (!fail_only || support->DynamicMetadataSupported == 0) dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); if (!fail_only || support->VRatioInPrefetchSupported == 0) dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); - if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) - dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); - if (!fail_only || support->TotalAvailablePipesSupport == 0) - dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + if (!fail_only || support->ModeSupport == 0) dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); - if (!fail_only || support->ViewportSizeSupport == 0) - dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); dml2_printf("DML: ===================================== \n"); } @@ -235,6 +268,7 @@ dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStart dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix); dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix); dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix); +dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines); dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY); dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC); dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte); @@ -480,7 +514,7 @@ static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode default: DML2_ASSERT(0); return 256; - }; + } } static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan) @@ -2343,16 +2377,16 @@ static void calculate_mcache_row_bytes( } if (p->gpuvm_enable) { - meta_per_mvmpg_per_channel = (float)vmpg_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans; //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic if (p->surf_vert && vmpg_bytes > blk_bytes) { - meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans; } *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom } else { - meta_per_mvmpg_per_channel = (float) blk_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans; if (!p->surf_vert) *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; @@ -2851,16 +2885,9 @@ static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch, s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels); for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->display_cfg->hostvm_enable == true) { + if (p->display_cfg->gpuvm_enable == true) { p->vm_group_bytes[k] = 512; p->dpte_group_bytes[k] = 512; - } else if (p->display_cfg->gpuvm_enable == true) { - p->vm_group_bytes[k] = 2048; - if (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes >= 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) { - p->dpte_group_bytes[k] = 512; - } else { - p->dpte_group_bytes[k] = 2048; - } } else { p->vm_group_bytes[k] = 0; p->dpte_group_bytes[k] = 0; @@ -3185,7 +3212,7 @@ static double CalculateUrgentLatency( double fabric_max_transport_latency_margin) { double urgent_latency = 0; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); @@ -3196,7 +3223,7 @@ static double CalculateUrgentLatency( } } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); @@ -3226,7 +3253,7 @@ static double CalculateTripToMemory( double fabric_max_transport_latency_margin) { double trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); @@ -3235,7 +3262,7 @@ static double CalculateTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); @@ -3265,7 +3292,7 @@ static double CalculateMetaTripToMemory( double fabric_max_transport_latency_margin) { double meta_trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); } else { @@ -3273,7 +3300,7 @@ static double CalculateMetaTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); @@ -3781,8 +3808,8 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; - p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; - p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; } if (p->SwathHeightC[k] == 0) @@ -3841,7 +3868,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64; if (*p->UnboundedRequestEnabled) { *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b, - (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / 64)), 1.0); + (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]); dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes); @@ -3852,21 +3879,20 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch #endif *p->hw_debug5 = false; - if (!p->mrq_present) { - for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (!(*p->UnboundedRequestEnabled) - && p->display_cfg->plane_descriptors[k].surface.dcc.enable - && ((p->rob_buffer_size_kbytes * 1024 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) - *p->hw_debug5 = true; -#ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); - dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); - dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); - dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); - dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); - dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); + for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { + if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1) + && p->display_cfg->plane_descriptors[k].surface.dcc.enable + && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1) + + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k]))) + *p->hw_debug5 = true; +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled); + dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION); + dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH); + dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte); + dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]); + dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); #endif - } } } @@ -4559,15 +4585,6 @@ static void calculate_tdlut_setting( return; } - - if (!p->setup_for_tdlut) { - *p->tdlut_groups_per_2row_ub = 0; - *p->tdlut_opt_time = 0; - *p->tdlut_drain_time = 0; - *p->tdlut_bytes_per_group = 0; - return; - } - if (p->tdlut_mpc_width_flag) { tdlut_mpc_width = 33; tdlut_bytes_per_group_simple = 39*256; @@ -4616,7 +4633,7 @@ static void calculate_tdlut_setting( *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; //the delivery cycles is DispClk cycles per line * number of lines * number of slices tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; - tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / 9.0; + tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); } else { //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); @@ -4627,7 +4644,7 @@ static void calculate_tdlut_setting( //the tdlut is fetched during the 2 row times of prefetch. if (p->setup_for_tdlut) { - *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2(*p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); + *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; } @@ -4640,7 +4657,7 @@ static void calculate_tdlut_setting( dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz); dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width); - dml2_printf("DML::%s: tdlut_addressing_mode = %u\n", __func__, p->tdlut_addressing_mode); + dml2_printf("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear"); dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes); dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes); dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame); @@ -4706,11 +4723,12 @@ static void CalculateTarb( static double CalculateTWait( long reserved_vblank_time_ns, double UrgentLatency, - double Ttrip) + double Ttrip, + double g6_temp_read_blackout_us) { double TWait; double t_urg_trip = math_max2(UrgentLatency, Ttrip); - TWait = reserved_vblank_time_ns/1000.0 + t_urg_trip; + TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns); @@ -4858,13 +4876,23 @@ static double get_urgent_bandwidth_required( } if (!exclude_this_plane) { - surface_required_bw[k] = math_max4(NumberOfDPP[k] * prefetch_vmrow_bw[k], - l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur, - l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre, - (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]); + l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; + l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; + l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; + surface_required_bw[k] = math_max4(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw); /* export peak required bandwidth for the surface */ surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw); + dml2_printf("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw); + dml2_printf("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw); + dml2_printf("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw); + dml2_printf("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]); + dml2_printf("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]); +#endif } else { surface_required_bw[k] = 0.0; } @@ -4873,6 +4901,8 @@ static double get_urgent_bandwidth_required( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]); + dml2_printf("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw); + dml2_printf("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip); dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor); dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0); dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1); @@ -4886,6 +4916,8 @@ static double get_urgent_bandwidth_required( dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]); dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]); dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]); + dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]); dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]); dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]); @@ -4964,7 +4996,7 @@ static void CalculateExtraLatency( max_request_size_bytes = request_size_bytes_chroma[k]; } - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; *ExtraLatency = *ExtraLatency_sr; if (max_oustanding_when_urgent_expected) @@ -4980,11 +5012,14 @@ static void CalculateExtraLatency( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type); + dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); + dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips); dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected); dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock); dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb); dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); @@ -5037,7 +5072,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->bytes_pp = 0.0; s->dep_bytes = 0.0; s->min_Lsw_oto = 0.0; + s->min_Lsw_equ = 0.0; s->Tsw_est1 = 0.0; + s->Tsw_est2 = 0.0; s->Tsw_est3 = 0.0; s->cursor_prefetch_bytes = 0; *p->prefetch_cursor_bw = 0; @@ -5059,7 +5096,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels); dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable); dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup); - dml2_printf("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup); dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable); dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait); @@ -5092,21 +5128,15 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock; s->trip_to_mem = p->Ttrip; -#ifdef DML_TVM_UPDATE_EN *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg); if (dcc_mrq_enable) *p->Tvm_trips_flip = *p->Tvm_trips; else *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; -#else - *p->Tvm_trips = p->ExtraLatencyPrefetch + s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)); - *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem; -#endif *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1); *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2); -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataVMEnabled == true) { *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; @@ -5114,15 +5144,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->Tdmdl_vm = 0; *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex } -#else - if (p->DynamicMetadataVMEnabled == true) { - *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips; - *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip; - } else { - *p->Tdmdl_vm = 0; - *p->Tdmdl = p->TWait + p->ExtraLatencyPrefetch; // Tex - } -#endif if (p->DynamicMetadataEnable == true) { if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) { @@ -5186,7 +5207,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler); #endif - s->NoTimeToPrefetch = false; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips); dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips); @@ -5199,14 +5219,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime; *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime; } else { -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut) s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0); else - s->Tvm_trips_rounded = s->LineTime / 4.0; -#else - s->Tvm_trips_rounded = s->LineTime / 4.0; -#endif + s->Tvm_trips_rounded = s->LineTime / 4.0; *p->Tvm_trips_flip_rounded = s->LineTime / 4.0; } @@ -5235,16 +5251,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->Tno_bw = 0; } -#ifdef DML_TVM_UPDATE_EN if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3) *p->Tno_bw_flip = *p->Tno_bw; else *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip -#else - *p->Tno_bw_flip = 0; - if (p->display_cfg->gpuvm_enable == true) - *p->Tno_bw_flip = *p->Tno_bw; -#endif if (dml_is_420(p->myPipe->SourcePixelFormat)) { s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0; @@ -5258,32 +5268,28 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; -#ifdef DML_TDLUT_ROW_BYTES_FIX_EN s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor; s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; -#endif s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); + vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); if (p->setup_for_tdlut) vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0); -#ifdef DML_TDLUT_ROW_BYTES_FIX_EN tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0); -#else - tdlut_row_bytes = p->tdlut_pte_bytes_per_frame; -#endif -#ifdef DML_REG_LIMIT_CLAMP_EN s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto, p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); -#endif s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; if (p->display_cfg->gpuvm_enable == true) { @@ -5297,11 +5303,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); #endif } else { -#ifdef DML_TVM_UPDATE_EN s->Tvm_oto = s->Tvm_trips_rounded; -#else - s->Tvm_oto = s->LineTime / 4.0; -#endif } if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { @@ -5325,19 +5327,16 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); //Tpre_equ in line time -#ifdef DML_TVM_UPDATE_EN if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable) s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo; else s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo; -#else - s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(s->TWait_p + p->TCalc, *p->Tdmdl - p->Ttrip)) / s->LineTime - Lo; -#endif s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal); dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip); dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch); @@ -5375,6 +5374,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; +#ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime); dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup); @@ -5395,18 +5395,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip); dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler); dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler); - - s->dep_bytes = math_max2(vm_bytes * p->HostVMInefficiencyFactor, p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); - - dml2_printf("DML::%s: dep_bytes: %f\n", __func__, s->dep_bytes); - dml2_printf("DML::%s: prefetch_sw_bytes: %f\n", __func__, s->prefetch_sw_bytes); dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); - - if (s->prefetch_sw_bytes < s->dep_bytes) { - s->prefetch_sw_bytes = 2 * s->dep_bytes; - dml2_printf("DML::%s: bump prefetch_sw_bytes to %f\n", __func__, s->prefetch_sw_bytes); - } + dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); + dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre)); + dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); +#endif *p->dst_y_per_vm_vblank = 0; *p->dst_y_per_row_vblank = 0; @@ -5419,7 +5413,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time // So that means prefetch bw calculated can be higher since the total time availabe for prefetch is less - if (s->dst_y_prefetch_equ > 1) { + bool min_Lsw_equ_ok = s->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; + + if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok) { s->prefetch_bw1 = 0.; s->prefetch_bw2 = 0.; s->prefetch_bw3 = 0.; @@ -5436,28 +5432,35 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw1 = 0; dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); - if ((p->VStartup == p->MaxVStartup) && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { + if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / - (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); + (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, s->Tpre_rounded); - dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_oto * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); - dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto); + dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); + dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); + dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); #endif } // prefetch_bw2: VM + SW - if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0) + if (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / - (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded); - else + (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); + s->Tsw_est2 = s->prefetch_sw_bytes / s->prefetch_bw2; + } else s->prefetch_bw2 = 0; + dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); + if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { + s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); + dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); + } + // prefetch_bw3: 2*R0 + SW if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) { s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + s->prefetch_sw_bytes) / @@ -5467,8 +5470,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw3 = 0; dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); - if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && ((s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { - s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); } @@ -5484,6 +5487,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); + dml2_printf("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2); dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3); dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1); dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2); @@ -5504,9 +5508,18 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // here is to make sure equ bw wont be more agressive than the latency-based requirement. // check vm time >= vm_trips // check r0 time >= r0_trips + + double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes); + + dml2_printf("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded); + dml2_printf("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded); + if (s->prefetch_bw1 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1 >= s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw1 >= s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1; + double row_transfer_time = total_row_bytes / s->prefetch_bw1; + dml2_printf("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case1OK = true; } } @@ -5516,8 +5529,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // check vm time >= vm_trips // check r0 time < r0_trips if (s->prefetch_bw2 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2 >= s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw2 < s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2; + double row_transfer_time = total_row_bytes / s->prefetch_bw2; + dml2_printf("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) { Case2OK = true; } } @@ -5526,8 +5542,11 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // check vm time < vm_trips // check r0 time >= r0_trips if (s->prefetch_bw3 > 0) { - if (*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3 < s->Tvm_trips_rounded && - (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw3 >= s->Tr0_trips_rounded) { + double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3; + double row_transfer_time = total_row_bytes / s->prefetch_bw3; + dml2_printf("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time); + dml2_printf("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time); + if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) { Case3OK = true; } } @@ -5542,11 +5561,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw_equ = s->prefetch_bw4; } -#ifdef DML_REG_LIMIT_CLAMP_EN s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ, p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); -#endif #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK); dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK); @@ -5595,13 +5612,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->TimeForFetchingVM = s->Tvm_equ; s->TimeForFetchingRowInVBlank = s->Tr0_equ; - if (p->VStartup == p->MaxVStartup) { - *p->dst_y_per_vm_vblank = math_floor2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_floor2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } else { - *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - } + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__); #endif @@ -5645,7 +5658,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); + dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY); *p->VRatioPrefetchY = 0; } #ifdef __DML_VBA_DEBUG__ @@ -5668,7 +5681,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0)); } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); + dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC); *p->VRatioPrefetchC = 0; } #ifdef __DML_VBA_DEBUG__ @@ -5690,14 +5703,13 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch #endif } else { s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); - dml2_printf("DML::%s: MyErr set, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); + dml2_printf("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required); + dml2_printf("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ); *p->VRatioPrefetchY = 0; *p->VRatioPrefetchC = 0; *p->RequiredPrefetchPixelDataBWLuma = 0; *p->RequiredPrefetchPixelDataBWChroma = 0; } - dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM); dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM); dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank); @@ -5708,7 +5720,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow); } else { - dml2_printf("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); + dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", + __func__, min_Lsw_equ_ok, s->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); s->NoTimeToPrefetch = true; s->TimeForFetchingVM = 0; s->TimeForFetchingRowInVBlank = 0; @@ -5740,7 +5754,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { prefetch_vm_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); + dml2_printf("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank); } if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { @@ -5756,7 +5770,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { prefetch_row_bw = 0; s->NoTimeToPrefetch = true; - dml2_printf("DML::%s: MyErr set. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank); } *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw); @@ -5773,11 +5787,16 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->VRatioPrefetchC = 0; *p->RequiredPrefetchPixelDataBWLuma = 0; *p->RequiredPrefetchPixelDataBWChroma = 0; + *p->prefetch_vmrow_bw = 0; } dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank); dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank); + dml2_printf("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma); + dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma); dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch); + return s->NoTimeToPrefetch; } @@ -6169,6 +6188,7 @@ static void CalculateFlipSchedule( unsigned int dpte_row_height_chroma, bool use_one_row_for_frame_flip, unsigned int max_flip_time_us, + unsigned int max_flip_time_lines, unsigned int per_pipe_flip_bytes, unsigned int meta_row_bytes, unsigned int meta_row_height, @@ -6183,12 +6203,13 @@ static void CalculateFlipSchedule( { struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals; - l->dual_plane = dml2_core_shared_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; + l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha; l->dpte_row_bytes = DPTEBytesPerRow; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable); dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us); + dml2_printf("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines); dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes); dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw); @@ -6239,7 +6260,8 @@ static void CalculateFlipSchedule( if (use_lb_flip_bw) { // For mode check, calculation the flip bw requirement with worst case flip time - l->max_flip_time = math_min2(l->min_row_time, math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); + l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio), + math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us)); //The lower bound on flip bandwidth // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required @@ -6257,7 +6279,7 @@ static void CalculateFlipSchedule( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time); dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes); - dml2_printf("DML::%s: total row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_row_bytes); + dml2_printf("DML::%s: total row bytes (%d row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes); dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes); dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip)); dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded)); @@ -6268,6 +6290,7 @@ static void CalculateFlipSchedule( dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows); dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime); dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows); + dml2_printf("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded)); } #endif l->lb_flip_bw = math_max3(l->lb_flip_bw, @@ -6284,7 +6307,7 @@ static void CalculateFlipSchedule( *dst_y_per_vm_flip = 1; // not used *dst_y_per_row_flip = 1; // not used - *ImmediateFlipSupportedForPipe = true; + *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded); } else { if (iflip_enable) { l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i) @@ -6350,6 +6373,7 @@ static void CalculateFlipSchedule( dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip); dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip); dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip); + dml2_printf("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time); } dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw); dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe); @@ -6380,6 +6404,12 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep; + if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) { + p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; + } p->Watermark->g6_temp_read_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; #ifdef __DML_VBA_DEBUG__ @@ -6541,7 +6571,8 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_unsupported; if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) { - if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) + if (p->display_cfg->overrides.all_streams_blanked || + (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)) p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank_and_vactive; else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0) p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive; @@ -6585,13 +6616,13 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]); s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k]; - if (dml2_core_shared_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) + if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format)) p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c)); else p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, p->meta_row_height_c[k]); + dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]); dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c); dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c); dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c); @@ -6856,7 +6887,8 @@ struct dml2_core_internal_g6_temp_read_blackouts_table { } entries[DML_MAX_CLK_TABLE_SIZE]; }; -struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = { +static const struct dml2_core_internal_g6_temp_read_blackouts_table + core_dcn4_g6_temp_read_blackout_table = { .entries = { { .uclk_khz = 96000, @@ -6921,6 +6953,43 @@ static double get_g6_temp_read_blackout_us( return (double)blackout_us; } +static double get_max_urgent_latency_us( + struct dml2_dcn4x_soc_qos_params *dcn4x, + double uclk_freq_mhz, + double FabricClock, + unsigned int min_clk_index) +{ + double latency; + latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz + * (1 + dcn4x->umc_max_latency_margin / 100.0) + + dcn4x->mall_overhead_fclk_cycles / FabricClock + + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock + * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0); + return latency; +} + +static void calculate_pstate_keepout_dst_lines( + const struct dml2_display_cfg *display_cfg, + const struct dml2_core_internal_watermarks *watermarks, + unsigned int pstate_keepout_dst_lines[]) +{ + const struct dml2_stream_parameters *stream_descriptor; + unsigned int i; + + for (i = 0; i < display_cfg->num_planes; i++) { + if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) { + stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index]; + + pstate_keepout_dst_lines[i] = + (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz)); + + if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) { + pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1; + } + } + } +} + static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params) { struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib; @@ -6963,7 +7032,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000); - mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); + mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); #if defined(__DML_VBA_DEBUG__) @@ -6981,7 +7050,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz); dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock); - dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz); dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes); dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present); @@ -7126,7 +7194,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.WritebackLatencySupport = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && - (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024.0 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) { + (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->soc.qos_parameters.writeback.base_latency_us)) { mode_lib->ms.support.WritebackLatencySupport = false; } } @@ -7202,17 +7270,17 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #if defined(DV_BUILD) // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming. if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { - lb_buffer_size_bits_luma = 34620 * 57;; + lb_buffer_size_bits_luma = 34620 * 57; lb_buffer_size_bits_chroma = 13560 * 57; } #endif */ - mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 / (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0)); if (mode_lib->ms.BytePerPixelC[k] == 0.0) { mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0; } else { - mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /*FIXME_STAGE2 was: LBBitPerPixel*/ / + mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 / (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0)); } @@ -7231,10 +7299,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out /* Cursor Support Check */ mode_lib->ms.support.CursorSupport = true; for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].cursor.cursor_width > 0.0) { - if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) { + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false) mode_lib->ms.support.CursorSupport = false; - } } } @@ -7295,7 +7362,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.ViewportExceedsSurface = false; if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) { for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width || + display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) { mode_lib->ms.support.ViewportExceedsSurface = true; #if defined(__DML_VBA_DEBUG__) dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width); @@ -7304,11 +7372,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height); dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface); #endif - if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { - if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || - display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { - mode_lib->ms.support.ViewportExceedsSurface = true; - } + } + if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { + if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width || + display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) { + mode_lib->ms.support.ViewportExceedsSurface = true; } } } @@ -7466,6 +7534,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out &mode_lib->ms.OutputRate[k], &mode_lib->ms.RequiredSlots[k]); + if (s->OutputBpp[k] == 0.0) { + s->OutputBpp[k] = mode_lib->ms.OutputBpp[k]; + } + if (mode_lib->ms.RequiresDSC[k] == false) { mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC; mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC; @@ -7580,7 +7652,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, mode_lib->ip.writeback_line_buffer_buffer_size)); } @@ -7665,8 +7737,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true) mode_lib->ms.support.P2IWith420 = true; - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary && s->OutputBpp[k] != 0) - mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true; if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support) mode_lib->ms.support.DSC422NativeNotSupported = true; @@ -7819,7 +7889,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k], mode_lib->ms.ODMMode[k], mode_lib->ip.maximum_dsc_bits_per_component, - mode_lib->ms.OutputBpp[k], + s->OutputBpp[k], display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, mode_lib->ms.support.NumberOfDSCSlices[k], @@ -8059,59 +8129,63 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.excess_vactive_fill_bw_c); mode_lib->ms.UrgLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = CalculateTripToMemory( mode_lib->ms.UrgLatency, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); bool cursor_not_enough_urgent_latency_hiding = 0; - calculate_cursor_req_attributes( - display_cfg->plane_descriptors[k].cursor.cursor_width, - display_cfg->plane_descriptors[k].cursor.cursor_bpp, - // output - &s->cursor_lines_per_chunk[k], - &s->cursor_bytes_per_line[k], - &s->cursor_bytes_per_chunk[k], - &s->cursor_bytes[k]); - - calculate_cursor_urgent_burst_factor( - mode_lib->ip.cursor_buffer_size, - display_cfg->plane_descriptors[k].cursor.cursor_width, - s->cursor_bytes_per_chunk[k], - s->cursor_lines_per_chunk[k], - line_time_us, - mode_lib->ms.UrgLatency, + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, + + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->ms.UrgLatency, + + // output + &mode_lib->ms.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + } - // output - &mode_lib->ms.UrgentBurstFactorCursor[k], - &cursor_not_enough_urgent_latency_hiding); mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k]; #ifdef __DML_VBA_DEBUG__ @@ -8254,20 +8328,20 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; mode_lib->ms.support.avg_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); mode_lib->ms.support.avg_non_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -8287,7 +8361,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #endif } - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4 && mode_lib->ms.BytePerPixelC[k] > 0) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -8460,7 +8534,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out { mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep; - calculate_hostvm_inefficiency_factor( &s->HostVMInefficiencyFactor, &s->HostVMInefficiencyFactorPrefetch, @@ -8501,10 +8574,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); + CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->ms.DCFCLK, mode_lib->ms.FabricClock, @@ -8540,7 +8618,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.TWait[k] = CalculateTWait( display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, mode_lib->ms.UrgLatency, - mode_lib->ms.TripToMemory); + mode_lib->ms.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k]; myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK; @@ -8587,7 +8667,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k]; - CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[k]; CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; @@ -8669,8 +8748,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]); dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]); dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]); - dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]); + dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]); } } @@ -8683,20 +8762,15 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.support.VRatioInPrefetchSupported = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) { + if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { mode_lib->ms.support.VRatioInPrefetchSupported = false; + dml2_printf("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__); dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported); } } - s->AnyLinesForVMOrRowTooLarge = false; - for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (mode_lib->ms.LinesForDPTERow[k] >= 16 || mode_lib->ms.LinesForVM[k] >= 32) { - s->AnyLinesForVMOrRowTooLarge = true; - } - } - // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok if (mode_lib->ms.support.PrefetchSupported) { for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { @@ -8845,6 +8919,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.dpte_row_height_chroma[k], mode_lib->ms.use_one_row_for_frame_flip[k], mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, s->per_pipe_flip_bytes[k], mode_lib->ms.meta_row_bytes[k], s->meta_row_height_luma[k], @@ -8932,6 +9007,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out s->mSOCParameters.USRRetrainingLatency = 0; s->mSOCParameters.SMNLatency = 0; s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index); + s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); + s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; CalculateWatermarks_params->display_cfg = display_cfg; CalculateWatermarks_params->USRRetrainingRequired = false; @@ -8951,7 +9029,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC; CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY; CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC; - //CalculateWatermarks_params->LBBitPerPixel = 57; // FIXME_STAGE2, need a new ip param? CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY; CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC; CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP; @@ -8979,29 +9056,24 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs; CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); - } + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]); + } + dml2_printf("DML::%s: Done prefetch calculation\n", __func__); // End of Prefetch Check - dml2_printf("DML::%s: Done prefetch calculation\n", __func__); + mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us; //Re-ordering Buffer Support Check - mode_lib->ms.support.max_urgent_latency_us - = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); - - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 - / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= mode_lib->ms.support.max_urgent_latency_us) { + / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; } } else { - if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { + if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; @@ -9024,15 +9096,12 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.dram_change_vactive_det_fill_delay_us); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.max_urgent_latency_us); + dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us); dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport); #endif /*Mode Support, Voltage State and SOC Configuration*/ { - // s->dram_clock_change_support = 1; - // s->f_clock_change_support = 1; - if (mode_lib->ms.support.ScaleRatioAndTapsSupport && mode_lib->ms.support.SourceFormatPixelAndScanSupport && mode_lib->ms.support.ViewportSizeSupport @@ -9043,9 +9112,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out && !mode_lib->ms.support.ExceededMultistreamSlots && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink && !mode_lib->ms.support.NotEnoughLanesForMSO - //&& mode_lib->ms.support.LinkCapacitySupport == true // FIXME_STAGE2 && !mode_lib->ms.support.P2IWith420 - && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP && !mode_lib->ms.support.DSC422NativeNotSupported && mode_lib->ms.support.DSCSlicesODMModeSupported && !mode_lib->ms.support.NotEnoughDSCUnits @@ -9113,7 +9180,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out #if defined(__DML_VBA_DEBUG__) if (!mode_lib->ms.support.ModeSupport) - dml2_print_dml_mode_support_info(&mode_lib->ms.support, true); + dml2_print_mode_support_info(&mode_lib->ms.support, true); dml2_printf("DML::%s: --- DONE --- \n", __func__); #endif @@ -9132,6 +9199,10 @@ unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support; dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); + + for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) + dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); return result; @@ -9373,11 +9444,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } else { dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); } -#ifdef DML_VM_PTE_ADL_PATCH_EN if (dpte_groups_per_row_luma_ub <= 2) { dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; } -#endif dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); @@ -9406,11 +9475,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } else { dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); } -#ifdef DML_VM_PTE_ADL_PATCH_EN if (dpte_groups_per_row_chroma_ub <= 2) { dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; } -#endif dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); @@ -9535,17 +9602,16 @@ static void CalculateVMGroupAndRequestTimes( line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; -#ifdef DML_VM_PTE_ADL_PATCH_EN - if (num_group_per_lower_vm_stage_flip <= 2) { - num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage_flip + 1; - } + if (num_group_per_lower_vm_stage_pref > 0) + TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; + else + TimePerVMGroupVBlank[k] = 0; + + if (num_group_per_lower_vm_stage_flip > 0) + TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; + else + TimePerVMGroupFlip[k] = 0; - if (num_group_per_lower_vm_stage_pref <= 2) { - num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage_pref + 1; - } -#endif - TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; - TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; if (num_req_per_lower_vm_stage_pref > 0) TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; else @@ -9599,10 +9665,6 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc bool FoundCriticalSurface = false; double LastZ8StutterPeriod = 0; - unsigned int SwathSizeCriticalSurface; - unsigned int LastChunkOfSwathSize; - unsigned int MissingPartOfLastSwathOfDETSize; - memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals)); for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { @@ -9777,7 +9839,7 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) - / math_max2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + + / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) + *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)); @@ -9871,19 +9933,11 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame); #endif - SwathSizeCriticalSurface = (unsigned int)(l->BytePerPixelYCriticalSurface * l->SwathHeightYCriticalSurface * math_ceil2(l->SwathWidthYCriticalSurface, l->BlockWidth256BytesYCriticalSurface)); - LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024); - MissingPartOfLastSwathOfDETSize = (unsigned int)(math_ceil2(l->DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - l->DETBufferSizeYCriticalSurface); - - *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) && - (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); + *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface); dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface); dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte); - dml2_printf("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize); - dml2_printf("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize); dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); #endif } @@ -9928,14 +9982,14 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); - mode_lib->mp.Dcfclk = programming->min_clocks.dcn4.active.dcfclk_khz / 1000.0; - mode_lib->mp.FabricClock = programming->min_clocks.dcn4.active.fclk_khz / 1000.0; - mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); - mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4.active.uclk_khz / 1000.0; - mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4.dpprefclk_khz / 1000.0; - s->SOCCLK = (double)programming->min_clocks.dcn4.socclk_khz / 1000; - mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); - mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table); + mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0; + mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0; + mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); + mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0; + mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0; + s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000; + mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); + mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table); for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; @@ -9970,18 +10024,18 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; - mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4.dppclk_khz / 1000.0; + mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dppclk[k] > 0); } for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; - mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4.dscclk_khz / 1000.0; + mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); } - mode_lib->mp.Dispclk = programming->min_clocks.dcn4.dispclk_khz / 1000.0; - mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4.deepsleep_dcfclk_khz / 1000.0; + mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; + mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dcfclk > 0); dml2_assert(mode_lib->mp.FabricClock > 0); @@ -10462,11 +10516,16 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); } + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); + CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, - s->ReorderBytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, mode_lib->mp.Dcfclk, mode_lib->mp.FabricClock, mode_lib->ip.pixel_chunk_size_kbytes, @@ -10551,32 +10610,32 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.excess_vactive_fill_bw_c); mode_lib->mp.UrgentLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = CalculateTripToMemory( mode_lib->mp.UrgentLatency, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); @@ -10585,38 +10644,40 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.meta_trip_adder_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); for (k = 0; k < s->num_active_planes; ++k) { bool cursor_not_enough_urgent_latency_hiding = 0; - double line_time_us; + double line_time_us = 0.0; - calculate_cursor_req_attributes( - display_cfg->plane_descriptors[k].cursor.cursor_width, - display_cfg->plane_descriptors[k].cursor.cursor_bpp, + line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { + calculate_cursor_req_attributes( + display_cfg->plane_descriptors[k].cursor.cursor_width, + display_cfg->plane_descriptors[k].cursor.cursor_bpp, - // output - &s->cursor_lines_per_chunk[k], - &s->cursor_bytes_per_line[k], - &s->cursor_bytes_per_chunk[k], - &s->cursor_bytes[k]); - - line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); - - calculate_cursor_urgent_burst_factor( - mode_lib->ip.cursor_buffer_size, - display_cfg->plane_descriptors[k].cursor.cursor_width, - s->cursor_bytes_per_chunk[k], - s->cursor_lines_per_chunk[k], - line_time_us, - mode_lib->mp.UrgentLatency, + // output + &s->cursor_lines_per_chunk[k], + &s->cursor_bytes_per_line[k], + &s->cursor_bytes_per_chunk[k], + &s->cursor_bytes[k]); + + calculate_cursor_urgent_burst_factor( + mode_lib->ip.cursor_buffer_size, + display_cfg->plane_descriptors[k].cursor.cursor_width, + s->cursor_bytes_per_chunk[k], + s->cursor_lines_per_chunk[k], + line_time_us, + mode_lib->mp.UrgentLatency, - // output - &mode_lib->mp.UrgentBurstFactorCursor[k], - &cursor_not_enough_urgent_latency_hiding); + // output + &mode_lib->mp.UrgentBurstFactorCursor[k], + &cursor_not_enough_urgent_latency_hiding); + } mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k]; CalculateUrgentBurstFactor( @@ -10676,7 +10737,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.TWait[k] = CalculateTWait( display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, mode_lib->mp.UrgentLatency, - mode_lib->mp.TripToMemory); + mode_lib->mp.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->mp.Dppclk[k]; myPipe->Dispclk = mode_lib->mp.Dispclk; @@ -10722,7 +10785,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format; CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters; CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k]; - CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k]; CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes; CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled; @@ -10808,9 +10870,13 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex if (mode_lib->mp.dst_y_prefetch[k] < 2) s->DestinationLineTimesForPrefetchLessThan2 = true; - if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ || - mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__) + if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || + mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { s->VRatioPrefetchMoreThanMax = true; + dml2_printf("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__); + dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); + } if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) { dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]); @@ -10994,6 +11060,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.dpte_row_height_chroma[k], mode_lib->mp.use_one_row_for_frame_flip[k], mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, s->per_pipe_flip_bytes[k], mode_lib->mp.meta_row_bytes[k], mode_lib->mp.meta_row_height[k], @@ -11143,6 +11210,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->mmSOCParameters.USRRetrainingLatency = 0; s->mmSOCParameters.SMNLatency = 0; s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index); + s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); + s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; CalculateWatermarks_params->display_cfg = display_cfg; CalculateWatermarks_params->USRRetrainingRequired = false; @@ -11162,7 +11232,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC; CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY; CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC; - //CalculateWatermarks_params->LBBitPerPixel = 57; //FIXME_STAGE2 CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY; CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC; CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY; @@ -11203,6 +11272,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex } } + calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines); + dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index); dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz)); @@ -11491,9 +11562,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params) { + dml2_printf("DML::%s: ------------- START ----------\n", __func__); bool result = dml_core_mode_programming(in_out_params); - dml2_printf("DML::%s: ------------- START ----------\n", __func__); dml2_printf("DML::%s: result = %0d\n", __func__, result); dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); return result; @@ -12186,10 +12257,11 @@ void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg, void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index) { - out->dcn4.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); - out->dcn4.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); - out->dcn4.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); - out->dcn4.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); + out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index); + out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index); + out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index); + out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index); + out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index); } void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) @@ -12197,6 +12269,25 @@ void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_disp dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index); } +void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, + const struct display_configuation_with_meta *display_cfg, + struct dmub_cmd_fams2_global_config *fams2_global_config) +{ + fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required; + + if (fams2_global_config->features.bits.enable) { + fams2_global_config->features.bits.enable_stall_recovery = true; + fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START; + + fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us; + fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us; + fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us; + fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us; + + fams2_global_config->num_streams = display_cfg->display_config.num_streams; + } +} + void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_fams2_stream_static_state *fams2_programming, @@ -12209,6 +12300,11 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna unsigned int i; + if (display_cfg->display_config.overrides.all_streams_blanked) { + /* stream is blanked, so do nothing */ + return; + } + /* from display configuration */ fams2_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; fams2_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; @@ -12368,6 +12464,7 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp { double phantom_processing_delay_pix; unsigned int phantom_processing_delay_lines; + unsigned int phantom_min_v_active_lines; unsigned int phantom_v_active_lines; unsigned int phantom_v_startup_lines; unsigned int phantom_v_blank_lines; @@ -12377,14 +12474,16 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) * ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000)); phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total); - dml2_core_shared_div_rem(phantom_processing_delay_pix, + dml2_core_div_rem(phantom_processing_delay_pix, display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total, &rem); if (rem) phantom_processing_delay_lines++; phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); - phantom_v_active_lines = phantom_processing_delay_lines + dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) + mode_lib->ip.subvp_swath_height_margin_lines; + phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) / + display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio); + phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines; // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank) phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1; @@ -12396,8 +12495,8 @@ void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *disp // phantom_vtotal = vactive + vblank out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines; - out->phantom_min_v_active = dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index); - out->phantom_v_startup = dml_get_plane_max_vstartup_lines(mode_lib, plane_index); + out->phantom_min_v_active = phantom_min_v_active_lines; + out->phantom_v_startup = phantom_v_startup_lines; out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; #if defined(__DML_VBA_DEBUG__) @@ -12418,7 +12517,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport; out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport; out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420; - out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP; + out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false; out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported; out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion; out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated; @@ -12611,7 +12710,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib); - out->min_clocks.dcn4.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib); + out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib); out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib); @@ -12724,13 +12823,13 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod } } - out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles - / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles + / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { out->informative.misc.ROBUrgencyAvoidance = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h index b280ab573fbb..df2d1550a14b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_DCN4_CALCS_H__ #define __DML2_CORE_DCN4_CALCS_H__ @@ -30,6 +29,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index); void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index); void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_fams2_stream_static_state *fams2_programming, enum dml2_uclk_pstate_support_method pstate_method, int plane_index); +void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config); void dml2_core_calcs_get_dpte_row_height(unsigned int *dpte_row_height, struct dml2_core_internal_display_mode_lib *mode_lib, bool is_plane1, enum dml2_source_format_class SourcePixelFormat, enum dml2_swizzle_mode SurfaceTiling, enum dml2_rotation_angle ScanDirection, unsigned int pitch, unsigned int GPUVMMinPageSizeKBytes); void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c index f56abe9ab919..28394de02885 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_core_factory.h" #include "dml2_core_dcn4.h" #include "dml2_external_lib_deps.h" @@ -11,7 +10,7 @@ bool dml2_core_create(enum dml2_project_id project_id, struct dml2_core_instance { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_core_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h index 53636a8f52aa..411c514fe65c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_FACTORY_H__ #define __DML2_CORE_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c index 81f0a6f19f87..8f3c1c0b1cc1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.c @@ -779,7 +779,7 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000; mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config); mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000); - mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); + mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table); #if defined(__DML_VBA_DEBUG__) @@ -1776,32 +1776,32 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou #endif mode_lib->ms.UrgLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = CalculateTripToMemory( mode_lib->ms.UrgLatency, mode_lib->ms.FabricClock, mode_lib->ms.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory); @@ -1995,21 +1995,21 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true; mode_lib->ms.support.avg_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); mode_lib->ms.support.avg_non_urgent_latency_us - = (mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_average_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_average_transport_latency_margin / 100.0); + = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); double outstanding_latency_us = 0; for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -2029,7 +2029,7 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou #endif } - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4 && mode_lib->ms.BytePerPixelC[k] > 0) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) { outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k] / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)); @@ -2242,11 +2242,15 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou } double min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active]; + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, s->ReorderingBytes, mode_lib->ms.DCFCLK, mode_lib->ms.FabricClock, @@ -2713,13 +2717,13 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou //Re-ordering Buffer Support Check mode_lib->ms.support.max_urgent_latency_us - = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= mode_lib->ms.support.max_urgent_latency_us) { mode_lib->ms.support.ROBSupport = true; @@ -2727,7 +2731,7 @@ bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_ou mode_lib->ms.support.ROBSupport = false; } } else { - if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn3.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { + if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) { mode_lib->ms.support.ROBSupport = true; } else { mode_lib->ms.support.ROBSupport = false; @@ -5050,7 +5054,7 @@ static void calculate_mcache_row_bytes( unsigned int meta_per_mvmpg_per_channel_ub = 0; if (p->gpuvm_enable) { - meta_per_mvmpg_per_channel = (float)vmpg_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans; //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic if (p->surf_vert && vmpg_bytes > blk_bytes) { @@ -5059,7 +5063,7 @@ static void calculate_mcache_row_bytes( *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom } else { - meta_per_mvmpg_per_channel = (float)blk_bytes / 256 / p->num_chans; + meta_per_mvmpg_per_channel = (float)blk_bytes / (float)256 / p->num_chans; if (!p->surf_vert) *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0; @@ -5881,7 +5885,7 @@ static double CalculateUrgentLatency( double fabric_max_transport_latency_margin) { double urgent_latency = 0; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0) + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0); @@ -5892,7 +5896,7 @@ static double CalculateUrgentLatency( } } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles); dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz); @@ -5922,7 +5926,7 @@ static double CalculateTripToMemory( double fabric_max_transport_latency_margin) { double trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); @@ -5931,7 +5935,7 @@ static double CalculateTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles); dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles); @@ -5961,7 +5965,7 @@ static double CalculateMetaTripToMemory( double fabric_max_transport_latency_margin) { double meta_trip_to_memory_us; - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0) + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0); } else { @@ -5969,7 +5973,7 @@ static double CalculateMetaTripToMemory( } #ifdef __DML_VBA_DEBUG__ - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type); dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles); dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles); @@ -6460,8 +6464,8 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->SwathHeightC[k] = l->MaximumSwathHeightC[k] / 2; l->RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; l->RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; - p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; - p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; } if (p->SwathHeightC[k] == 0) @@ -7165,7 +7169,7 @@ static void calculate_tdlut_setting( *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width; //the delivery cycles is DispClk cycles per line * number of lines * number of slices tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width / 2.0, 1) * tdlut_mpc_width * tdlut_mpc_width; - tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / 9.0; + tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1); } else { //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); @@ -7485,7 +7489,7 @@ static void CalculateExtraLatency( max_request_size_bytes = request_size_bytes_chroma[k]; } - if (qos_type == dml2_qos_param_type_dcn4) { + if (qos_type == dml2_qos_param_type_dcn4x) { *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK; *ExtraLatency = *ExtraLatency_sr; if (max_oustanding_when_urgent_expected) @@ -7501,11 +7505,14 @@ static void CalculateExtraLatency( #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type); + dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode); + dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips); dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected); dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock); dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles); + dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes); dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb); dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency); dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr); @@ -7739,7 +7746,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; - s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor; s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); @@ -9304,6 +9310,10 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0)); } + if (dpte_groups_per_row_luma_ub <= 2) { + dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1; + } + dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]); dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]); dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]); @@ -9332,6 +9342,9 @@ static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTE } else { dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0)); } + if (dpte_groups_per_row_chroma_ub <= 2) { + dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1; + } dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]); dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma); dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub); @@ -9386,8 +9399,8 @@ static void CalculateVMGroupAndRequestTimes( double TimePerVMRequestVBlank[], double TimePerVMRequestFlip[]) { - unsigned int num_group_per_lower_vm_stage = 0; - unsigned int num_req_per_lower_vm_stage = 0; + unsigned int num_group_per_lower_vm_stage = 1; + unsigned int num_req_per_lower_vm_stage = 1; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces); @@ -9451,6 +9464,14 @@ static void CalculateVMGroupAndRequestTimes( double line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz; + if (num_group_per_lower_vm_stage_flip <= 2) { + num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage_flip + 1; + } + + if (num_group_per_lower_vm_stage_pref <= 2) { + num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage_pref + 1; + } + TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref; TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip; TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref; @@ -9814,14 +9835,14 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info); dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane); - mode_lib->mp.Dcfclk = programming->min_clocks.dcn4.active.dcfclk_khz / 1000.0; - mode_lib->mp.FabricClock = programming->min_clocks.dcn4.active.fclk_khz / 1000.0; - mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); - mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4.active.uclk_khz / 1000.0; - mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4.dpprefclk_khz / 1000.0; - s->SOCCLK = (double)programming->min_clocks.dcn4.socclk_khz / 1000; - mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params); - mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4.active.uclk_khz, &mode_lib->soc.clk_table); + mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0; + mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0; + mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config); + mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0; + mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0; + s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000; + mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params); + mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table); for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; @@ -9856,18 +9877,18 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used; - mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4.dppclk_khz / 1000.0; + mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dppclk[k] > 0); } for (k = 0; k < s->num_active_planes; ++k) { unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index; - mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4.dscclk_khz / 1000.0; + mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0; dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]); } - mode_lib->mp.Dispclk = programming->min_clocks.dcn4.dispclk_khz / 1000.0; - mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4.deepsleep_dcfclk_khz / 1000.0; + mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0; + mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0; dml2_assert(mode_lib->mp.Dcfclk > 0); dml2_assert(mode_lib->mp.FabricClock > 0); @@ -10388,11 +10409,16 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params); } + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3) + s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes)); + CalculateExtraLatency( display_cfg, mode_lib->ip.rob_buffer_size_kbytes, - 0, //mode_lib->soc.round_trip_ping_latency_dcfclk_cycles, - s->ReorderBytes, + mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles, + s->ReorderingBytes, mode_lib->mp.Dcfclk, mode_lib->mp.FabricClock, mode_lib->ip.pixel_chunk_size_kbytes, @@ -10465,32 +10491,32 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e mode_lib->mp.WritebackDelay[k] = mode_lib->mp.WritebackDelay[j]; mode_lib->mp.UrgentLatency = CalculateUrgentLatency( - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_pixel_vm_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.base_latency_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us, mode_lib->soc.do_urgent_latency_adjustment, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_fclk_us, - mode_lib->soc.qos_parameters.qos_params.dcn3.urgent_latency_us.scaling_factor_mhz, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us, + mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.df_qos_response_time_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_urgent_ramp_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = CalculateTripToMemory( mode_lib->mp.UrgentLatency, mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory); @@ -10499,10 +10525,10 @@ bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_e mode_lib->mp.FabricClock, mode_lib->mp.uclk_freq_mhz, mode_lib->soc.qos_parameters.qos_type, - mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.meta_trip_adder_fclk_cycles, - mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin, - mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin); + mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles, + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin, + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin); for (k = 0; k < s->num_active_planes; ++k) { calculate_cursor_req_attributes( @@ -11945,14 +11971,14 @@ void dml2_core_shared_get_pipe_regs(const struct dml2_display_cfg *display_cfg, void dml2_core_shared_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index) { - // out->min_clocks.dcn4.dscclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); // FIXME_STAGE2 - // out->min_clocks.dcn4.dtbclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); - // out->min_clocks.dcn4.phyclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); - - out->global_sync.dcn4.vready_offset_pixels = mode_lib->mp.VReadyOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; - out->global_sync.dcn4.vstartup_lines = mode_lib->mp.VStartup[mode_lib->mp.pipe_plane[pipe_index]]; - out->global_sync.dcn4.vupdate_offset_pixels = mode_lib->mp.VUpdateOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; - out->global_sync.dcn4.vupdate_vupdate_width_pixels = mode_lib->mp.VUpdateWidthPix[mode_lib->mp.pipe_plane[pipe_index]]; + // out->min_clocks.dcn4x.dscclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); // FIXME_STAGE2 + // out->min_clocks.dcn4x.dtbclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); + // out->min_clocks.dcn4x.phyclk_khz = (unsigned int)(dml_get_dscclk_calculated(mode_lib, pipe_index) * 1000); + + out->global_sync.dcn4x.vready_offset_pixels = mode_lib->mp.VReadyOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vstartup_lines = mode_lib->mp.VStartup[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vupdate_offset_pixels = mode_lib->mp.VUpdateOffsetPix[mode_lib->mp.pipe_plane[pipe_index]]; + out->global_sync.dcn4x.vupdate_vupdate_width_pixels = mode_lib->mp.VUpdateWidthPix[mode_lib->mp.pipe_plane[pipe_index]]; } void dml2_core_shared_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx) @@ -12255,7 +12281,7 @@ void dml2_core_shared_get_informative(const struct dml2_core_internal_display_mo out->informative.misc.cstate_max_cap_mode = mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE; - out->min_clocks.dcn4.dpprefclk_khz = (int unsigned)(mode_lib->mp.GlobalDPPCLK * 1000.0); + out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)(mode_lib->mp.GlobalDPPCLK * 1000.0); out->informative.qos.max_active_fclk_change_latency_supported = mode_lib->mp.MaxActiveFCLKChangeLatencySupported; @@ -12368,13 +12394,13 @@ void dml2_core_shared_get_informative(const struct dml2_core_internal_display_mo } } - out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles - / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4.fabric_max_transport_latency_margin / 100.0); + out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles + / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); - if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4) { + if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { out->informative.misc.ROBUrgencyAvoidance = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h deleted file mode 100644 index d76bda907ec8..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared.h +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#ifndef __DML2_CORE_SHARED_H__ -#define __DML2_CORE_SHARED_H__ - -#define __DML_VBA_DEBUG__ -#define __DML2_CALCS_MAX_VRATIO_PRE_OTO__ 4.0 //<brief Prefetch schedule max vratio for one to one scheduling calculation for prefetch -#define __DML2_CALCS_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ 6.0 //<brief Prefetch schedule max vratio when enhance prefetch schedule acceleration is enabled and vstartup is earliest possible already -#define __DML2_CALCS_DPP_INVALID__ 0 -#define __DML2_CALCS_DCFCLK_FACTOR__ 1.15 //<brief fudge factor for min dcfclk calclation -#define __DML2_CALCS_PIPE_NO_PLANE__ 99 - -#include "dml2_core_shared_types.h" -#include "dml2_internal_shared_types.h" - -double dml2_core_shared_div_rem(double dividend, unsigned int divisor, unsigned int *remainder); - -const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); -const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); -bool dml2_core_shared_is_420(enum dml2_source_format_class source_format); - -bool dml2_core_shared_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params); -bool dml2_core_shared_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params); -void dml2_core_shared_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out); -void dml2_core_shared_get_arb_params(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out); -void dml2_core_shared_get_pipe_regs(const struct dml2_display_cfg *display_cfg, struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_per_pipe_register_set *out, int pipe_index); -void dml2_core_shared_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index); -void dml2_core_shared_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx); -void dml2_core_shared_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index); -void dml2_core_shared_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx); -void dml2_core_shared_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index); -void dml2_core_shared_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out); -void dml2_core_shared_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 1343b744eeb3..13961c2eb634 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_CORE_SHARED_TYPES_H__ #define __DML2_CORE_SHARED_TYPES_H__ @@ -10,6 +9,15 @@ #include "dml_top_display_cfg_types.h" #include "dml_top_types.h" +#define __DML_VBA_DEBUG__ +#define __DML2_CALCS_MAX_VRATIO_PRE_OTO__ 4.0 //<brief max vratio for one-to-one prefetch bw scheduling +#define __DML2_CALCS_MAX_VRATIO_PRE_EQU__ 6.0 //<brief max vratio for equalized prefetch bw scheduling +#define __DML2_CALCS_MAX_VRATIO_PRE__ 8.0 //<brief max prefetch vratio register limit + +#define __DML2_CALCS_DPP_INVALID__ 0 +#define __DML2_CALCS_DCFCLK_FACTOR__ 1.15 //<brief fudge factor for min dcfclk calclation +#define __DML2_CALCS_PIPE_NO_PLANE__ 99 + struct dml2_core_ip_params { unsigned int vblank_nom_default_us; unsigned int remote_iommu_outstanding_translations; @@ -70,6 +78,7 @@ struct dml2_core_ip_params { unsigned int words_per_channel; bool imall_supported; unsigned int max_flip_time_us; + unsigned int max_flip_time_lines; unsigned int subvp_swath_height_margin_lines; unsigned int subvp_fw_processing_delay_us; unsigned int subvp_pstate_allow_width_us; @@ -782,6 +791,7 @@ struct dml2_core_internal_mode_program { unsigned int VUpdateOffsetPix[DML2_MAX_PLANES]; unsigned int VUpdateWidthPix[DML2_MAX_PLANES]; unsigned int VReadyOffsetPix[DML2_MAX_PLANES]; + unsigned int pstate_keepout_dst_lines[DML2_MAX_PLANES]; // Latency and Support double MaxActiveFCLKChangeLatencySupported; @@ -852,6 +862,9 @@ struct dml2_core_internal_SOCParametersList { double USRRetrainingLatency; double SMNLatency; double g6_temp_read_blackout_us; + double max_urgent_latency_us; + double df_response_time_us; + enum dml2_qos_param_type qos_type; }; struct dml2_core_calcs_mode_support_locals { @@ -865,7 +878,7 @@ struct dml2_core_calcs_mode_support_locals { unsigned int dpte_row_bytes_per_row_l[DML2_MAX_PLANES]; unsigned int dpte_row_bytes_per_row_c[DML2_MAX_PLANES]; - bool dummy_boolean[2]; + bool dummy_boolean[3]; unsigned int dummy_integer[3]; unsigned int dummy_integer_array[36][DML2_MAX_PLANES]; enum dml2_odm_mode dummy_odm_mode[DML2_MAX_PLANES]; @@ -913,9 +926,7 @@ struct dml2_core_calcs_mode_support_locals { double HostVMInefficiencyFactor; double HostVMInefficiencyFactorPrefetch; - unsigned int NextMaxVStartup; unsigned int MaxVStartup; - bool AnyLinesForVMOrRowTooLarge; double PixelClockBackEndFactor; unsigned int NumDSCUnitRequired; @@ -975,7 +986,7 @@ struct dml2_core_calcs_mode_programming_locals { unsigned int DSCFormatFactor; struct dml2_core_internal_DmlPipe SurfaceParameters[DML2_MAX_PLANES]; - unsigned int ReorderBytes; + unsigned int ReorderingBytes; double HostVMInefficiencyFactor; double HostVMInefficiencyFactorPrefetch; unsigned int TotalDCCActiveDPP; @@ -1196,11 +1207,14 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_locals { double Tdmec; double Tdmsks; double prefetch_sw_bytes; + double total_row_bytes; double prefetch_bw_pr; double bytes_pp; double dep_bytes; double min_Lsw_oto; + double min_Lsw_equ; double Tsw_est1; + double Tsw_est2; double Tsw_est3; double prefetch_bw1; double prefetch_bw2; @@ -1332,6 +1346,10 @@ struct dml2_core_shared_get_urgent_bandwidth_required_locals { double tmp_nom_adj_factor_p1; double tmp_pref_adj_factor_p0; double tmp_pref_adj_factor_p1; + double vm_row_bw; + double flip_and_active_bw; + double flip_and_prefetch_bw; + double active_and_excess_bw; }; struct dml2_core_shared_calculate_peak_bandwidth_required_locals { @@ -1688,7 +1706,6 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { enum dml2_output_format_class OutputFormat; unsigned int MaxInterDCNTileRepeaters; unsigned int VStartup; - unsigned int MaxVStartup; unsigned int HostVMMinPageSize; bool DynamicMetadataEnable; bool DynamicMetadataVMEnabled; @@ -2010,6 +2027,7 @@ struct dml2_core_internal_scratch { struct dml2_core_internal_display_mode_lib { struct dml2_core_ip_params ip; struct dml2_soc_bb soc; + struct dml2_ip_capabilities ip_caps; //@brief Mode Support and Mode programming struct // Used to hold input; intermediate and output of the calculations diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c new file mode 100644 index 000000000000..ab229e1598ae --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c @@ -0,0 +1,631 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_core_utils.h" + +double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder) +{ + *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0); + return dividend / divisor; + +} + +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) +{ + switch (bw_type) { + case (dml2_core_internal_bw_sdp): + return("dml2_core_internal_bw_sdp"); + case (dml2_core_internal_bw_dram): + return("dml2_core_internal_bw_dram"); + case (dml2_core_internal_bw_max): + return("dml2_core_internal_bw_max"); + default: + return("dml2_core_internal_bw_unknown"); + } +} + +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 1; + break; + case dml2_420_10: + val = 1; + break; + case dml2_420_12: + val = 1; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + default: + DML2_ASSERT(0); + break; + } + return val; +} + +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only) +{ + dml2_printf("DML: ===================================== \n"); + dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n"); + if (!fail_only || support->ScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport); + if (!fail_only || support->SourceFormatPixelAndScanSupport == 0) + dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport); + if (!fail_only || support->ViewportSizeSupport == 0) + dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport); + if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1) + dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion); + if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1) + dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated); + if (!fail_only || support->BPPForMultistreamNotIndicated == 1) + dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated); + if (!fail_only || support->MultistreamWithHDMIOreDP == 1) + dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP); + if (!fail_only || support->ExceededMultistreamSlots == 1) + dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots); + if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1) + dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink); + if (!fail_only || support->NotEnoughLanesForMSO == 1) + dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO); + if (!fail_only || support->P2IWith420 == 1) + dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420); + if (!fail_only || support->DSC422NativeNotSupported == 1) + dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported); + if (!fail_only || support->DSCSlicesODMModeSupported == 0) + dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported); + if (!fail_only || support->NotEnoughDSCUnits == 1) + dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits); + if (!fail_only || support->NotEnoughDSCSlices == 1) + dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices); + if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1) + dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe); + if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen); + if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported); + if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0) + dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport); + if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1) + dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported); + if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1) + dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState); + if (!fail_only || support->ROBSupport == 0) + dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport); + if (!fail_only || support->OutstandingRequestsSupport == 0) + dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport); + if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0) + dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance); + if (!fail_only || support->DISPCLK_DPPCLK_Support == 0) + dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support); + if (!fail_only || support->TotalAvailablePipesSupport == 0) + dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport); + if (!fail_only || support->NumberOfOTGSupport == 0) + dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport); + if (!fail_only || support->NumberOfHDMIFRLSupport == 0) + dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport); + if (!fail_only || support->NumberOfDP2p0Support == 0) + dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support); + if (!fail_only || support->EnoughWritebackUnits == 0) + dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits); + if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0) + dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport); + if (!fail_only || support->WritebackLatencySupport == 0) + dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport); + if (!fail_only || support->CursorSupport == 0) + dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport); + if (!fail_only || support->PitchSupport == 0) + dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport); + if (!fail_only || support->ViewportExceedsSurface == 1) + dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface); + if (!fail_only || support->PrefetchSupported == 0) + dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported); + if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0) + dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport); + if (!fail_only || support->AvgBandwidthSupport == 0) + dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport); + if (!fail_only || support->DynamicMetadataSupported == 0) + dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); + if (!fail_only || support->VRatioInPrefetchSupported == 0) + dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); + if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); + if (!fail_only || support->ExceededMALLSize == 1) + dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); + if (!fail_only || support->g6_temp_read_support == 0) + dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support); + if (!fail_only || support->ImmediateFlipSupport == 0) + dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport); + if (!fail_only || support->LinkCapacitySupport == 0) + dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport); + + if (!fail_only || support->ModeSupport == 0) + dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport); + dml2_printf("DML: ===================================== \n"); +} + +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type) +{ + switch (dml2_core_internal_soc_state_type) { + case (dml2_core_internal_soc_state_sys_idle): + return("dml2_core_internal_soc_state_sys_idle"); + case (dml2_core_internal_soc_state_sys_active): + return("dml2_core_internal_soc_state_sys_active"); + case (dml2_core_internal_soc_state_svp_prefetch): + return("dml2_core_internal_soc_state_svp_prefetch"); + case dml2_core_internal_soc_state_max: + default: + return("dml2_core_internal_soc_state_unknown"); + } +} + + +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg) +{ + for (unsigned int k = 0; k < display_cfg->num_planes; k++) { + double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc; + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) { + switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) { + case dml2_444: + out_bpp[k] = bpc * 3; + break; + case dml2_s422: + out_bpp[k] = bpc * 2; + break; + case dml2_n422: + out_bpp[k] = bpc * 2; + break; + case dml2_420: + default: + out_bpp[k] = bpc * 1.5; + break; + } + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) { + out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16; + } else { + out_bpp[k] = 0; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc); + dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable); + dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]); +#endif + } +} + +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up) +{ + unsigned int remainder; + + if (multiple == 0) + return num; + + remainder = num % multiple; + if (remainder == 0) + return num; + + if (up) + return (num + multiple - remainder); + else + return (num - remainder); +} + +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info) +{ + unsigned int num_active_pipes = 0; + + for (unsigned int k = 0; k < num_planes; k++) { + num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes); +#endif + return num_active_pipes; +} + +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane) +{ + unsigned int pipe_idx = 0; + + for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) { + pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__; + } + + for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) { + for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) { + pipe_plane[pipe_idx] = plane_idx; + pipe_idx++; + } + } +} + +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg) +{ + bool is_phantom = false; + + if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe || + plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) { + is_phantom = true; + } + + return is_phantom; +} + +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) +{ + switch (sw_mode) { + case (dml2_sw_linear): + return 256; break; + case (dml2_sw_256b_2d): + return 256; break; + case (dml2_sw_4kb_2d): + return 4096; break; + case (dml2_sw_64kb_2d): + return 65536; break; + case (dml2_sw_256kb_2d): + return 262144; break; + case (dml2_gfx11_sw_linear): + return 256; break; + case (dml2_gfx11_sw_64kb_d): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_t): + return 65536; break; + case (dml2_gfx11_sw_64kb_d_x): + return 65536; break; + case (dml2_gfx11_sw_64kb_r_x): + return 65536; break; + case (dml2_gfx11_sw_256kb_d_x): + return 262144; break; + case (dml2_gfx11_sw_256kb_r_x): + return 262144; break; + default: + DML2_ASSERT(0); + return 256; + }; +} + + +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan) +{ + bool is_vert = false; + if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) { + is_vert = true; + } else { + is_vert = false; + } + return is_vert; +} + + +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) +{ + int unsigned version = 0; + + if (sw_mode == dml2_sw_linear || + sw_mode == dml2_sw_256b_2d || + sw_mode == dml2_sw_4kb_2d || + sw_mode == dml2_sw_64kb_2d || + sw_mode == dml2_sw_256kb_2d) { + version = 12; + } else if (sw_mode == dml2_gfx11_sw_linear || + sw_mode == dml2_gfx11_sw_64kb_d || + sw_mode == dml2_gfx11_sw_64kb_d_t || + sw_mode == dml2_gfx11_sw_64kb_d_x || + sw_mode == dml2_gfx11_sw_64kb_r_x || + sw_mode == dml2_gfx11_sw_256kb_d_x || + sw_mode == dml2_gfx11_sw_256kb_r_x) { + version = 11; + } else { + dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); + DML2_ASSERT(0); + } + + return version; +} + +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params) +{ + unsigned int i; + unsigned int index = 0; + + for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) { + dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz); + + if (i == 0) + index = 0; + else + index = i - 1; + + if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz || + per_uclk_dpm_params[i].minimum_uclk_khz == 0) { + break; + } + } +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, index); +#endif + return index; +} + +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table) +{ + unsigned int i; + bool clk_entry_found = 0; + + for (i = 0; i < clk_table->uclk.num_clk_values; i++) { + dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]); + + if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) { + clk_entry_found = 1; + break; + } + } + + dml2_assert(clk_entry_found); +#if defined(__DML_VBA_DEBUG__) + dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz); + dml2_printf("DML::%s: index = %d\n", __func__, i); +#endif + return i; +} + +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format) +{ + bool ret_val = 0; + + if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha)) + ret_val = 1; + + return ret_val; +} + +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend) +{ + if (a == 0) + return 0; + + return (math_log2_approx(a) - subtrahend); +} + +static void create_phantom_stream_from_main_stream(struct dml2_stream_parameters *phantom, const struct dml2_stream_parameters *main, + const struct dml2_implicit_svp_meta *meta) +{ + memcpy(phantom, main, sizeof(struct dml2_stream_parameters)); + + phantom->timing.v_total = meta->v_total; + phantom->timing.v_active = meta->v_active; + phantom->timing.v_front_porch = meta->v_front_porch; + phantom->timing.vblank_nom = phantom->timing.v_total - phantom->timing.v_active; + phantom->timing.drr_config.enabled = false; +} + +static void create_phantom_plane_from_main_plane(struct dml2_plane_parameters *phantom, const struct dml2_plane_parameters *main, + const struct dml2_stream_parameters *phantom_stream, int phantom_stream_index, const struct dml2_stream_parameters *main_stream) +{ + memcpy(phantom, main, sizeof(struct dml2_plane_parameters)); + + phantom->stream_index = phantom_stream_index; + phantom->overrides.refresh_from_mall = dml2_refresh_from_mall_mode_override_force_disable; + phantom->overrides.legacy_svp_config = dml2_svp_mode_override_phantom_pipe_no_data_return; + phantom->composition.viewport.plane0.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane0.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane0.height); + phantom->composition.viewport.plane1.height = (long int unsigned) math_min2(math_ceil2( + (double)main->composition.scaler_info.plane1.v_ratio * (double)phantom_stream->timing.v_active, 16.0), + (double)main->composition.viewport.plane1.height); + phantom->immediate_flip = false; + phantom->dynamic_meta_data.enable = false; + phantom->cursor.num_cursors = 0; + phantom->cursor.cursor_width = 0; + phantom->tdlut.setup_for_tdlut = false; +} + +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch) +{ + unsigned int stream_index, plane_index; + const struct dml2_plane_parameters *main_plane; + const struct dml2_stream_parameters *main_stream; + const struct dml2_stream_parameters *phantom_stream; + + memcpy(svp_expanded_display_cfg, &display_cfg->display_config, sizeof(struct dml2_display_cfg)); + memset(scratch->main_stream_index_from_svp_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->svp_stream_index_from_main_stream_index, 0, sizeof(int) * DML2_MAX_PLANES); + memset(scratch->main_plane_index_to_phantom_plane_index, 0, sizeof(int) * DML2_MAX_PLANES); + + if (!display_cfg->display_config.overrides.enable_subvp_implicit_pmo) + return; + + /* disable unbounded requesting for all planes until stage 3 has been performed */ + if (!display_cfg->stage3.performed) { + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.enable = true; + svp_expanded_display_cfg->overrides.hw.force_unbounded_requesting.value = false; + } + // Create the phantom streams + for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { + main_stream = &display_cfg->display_config.stream_descriptors[stream_index]; + scratch->main_stream_index_from_svp_stream_index[stream_index] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = stream_index; + + if (display_cfg->stage3.stream_svp_meta[stream_index].valid) { + // Create the phantom stream + create_phantom_stream_from_main_stream(&svp_expanded_display_cfg->stream_descriptors[svp_expanded_display_cfg->num_streams], + main_stream, &display_cfg->stage3.stream_svp_meta[stream_index]); + + // Associate this phantom stream to the main stream + scratch->main_stream_index_from_svp_stream_index[svp_expanded_display_cfg->num_streams] = stream_index; + scratch->svp_stream_index_from_main_stream_index[stream_index] = svp_expanded_display_cfg->num_streams; + + // Increment num streams + svp_expanded_display_cfg->num_streams++; + } + } + + // Create the phantom planes + for (plane_index = 0; plane_index < display_cfg->display_config.num_planes; plane_index++) { + main_plane = &display_cfg->display_config.plane_descriptors[plane_index]; + + if (display_cfg->stage3.stream_svp_meta[main_plane->stream_index].valid) { + main_stream = &display_cfg->display_config.stream_descriptors[main_plane->stream_index]; + phantom_stream = &svp_expanded_display_cfg->stream_descriptors[scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index]]; + create_phantom_plane_from_main_plane(&svp_expanded_display_cfg->plane_descriptors[svp_expanded_display_cfg->num_planes], + main_plane, phantom_stream, scratch->svp_stream_index_from_main_stream_index[main_plane->stream_index], main_stream); + + // Associate this phantom plane to the main plane + scratch->phantom_plane_index_to_main_plane_index[svp_expanded_display_cfg->num_planes] = plane_index; + scratch->main_plane_index_to_phantom_plane_index[plane_index] = svp_expanded_display_cfg->num_planes; + + // Increment num planes + svp_expanded_display_cfg->num_planes++; + + // Adjust the main plane settings + svp_expanded_display_cfg->plane_descriptors[plane_index].overrides.legacy_svp_config = dml2_svp_mode_override_main_pipe; + } + } +} + +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + return true; + case dml2_none: + default: + return false; + } +} +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_dp2p0: + case dml2_edp: + case dml2_hdmifrl: + return true; + case dml2_hdmi: + case dml2_none: + default: + return false; + } +} + + +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp: + case dml2_edp: + return true; + case dml2_dp2p0: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + switch (stream_descriptor->output.output_encoder) { + case dml2_dp2p0: + return true; + case dml2_dp: + case dml2_edp: + case dml2_hdmi: + case dml2_hdmifrl: + case dml2_none: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor) +{ + return dml2_core_utils_is_dio_dp_encoder(stream_descriptor) + || dml2_core_utils_is_hpo_dp_encoder(stream_descriptor); +} + + +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + return true; + case dml2_dp_rate_na: + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + default: + return false; + } +} + +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate) +{ + switch (rate) { + case dml2_dp_rate_uhbr10: + case dml2_dp_rate_uhbr13p5: + case dml2_dp_rate_uhbr20: + return true; + case dml2_dp_rate_hbr: + case dml2_dp_rate_hbr2: + case dml2_dp_rate_hbr3: + case dml2_dp_rate_na: + default: + return false; + } +} + +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode) +{ + switch (odm_mode) { + case dml2_odm_mode_split_1to2: + case dml2_odm_mode_mso_1to2: + case dml2_odm_mode_mso_1to4: + return true; + case dml2_odm_mode_auto: + case dml2_odm_mode_bypass: + case dml2_odm_mode_combine_2to1: + case dml2_odm_mode_combine_3to1: + case dml2_odm_mode_combine_4to1: + default: + return false; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h new file mode 100644 index 000000000000..a5cc6a07167a --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_CORE_UTILS_H__ +#define __DML2_CORE_UTILS_H__ +#include "dml2_internal_shared_types.h" +#include "dml2_debug.h" +#include "lib_float_math.h" + +double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder); +const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); +bool dml2_core_utils_is_420(enum dml2_source_format_class source_format); +void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only); +const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); +void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg); +unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int multiple, bool up); +unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info); +void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane); +bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg); +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode); +bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan); +int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode); +unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params); +unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table); +bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format); +unsigned int dml2_core_utils_log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend); +void dml2_core_utils_expand_implict_subvp(const struct display_configuation_with_meta *display_cfg, struct dml2_display_cfg *svp_expanded_display_cfg, + struct dml2_core_scratch *scratch); +bool dml2_core_utils_is_stream_encoder_required(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_encoder_dsc_capable(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dio_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_hpo_dp_encoder(const struct dml2_stream_parameters *stream_descriptor); +bool dml2_core_utils_is_dp_8b_10b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_dp_128b_132b_link_rate(enum dml2_output_link_dp_rate rate); +bool dml2_core_utils_is_odm_split(enum dml2_odm_mode odm_mode); + +#endif /* __DML2_CORE_UTILS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index c94c4f32c957..8869ea089312 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_dpmm_dcn4.h" #include "dml2_internal_shared_types.h" #include "dml_top_types.h" @@ -83,9 +82,9 @@ static void calculate_system_active_minimums(struct dml2_dpmm_map_mode_to_soc_dp get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - in_out->programming->min_clocks.dcn4.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); - in_out->programming->min_clocks.dcn4.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); - in_out->programming->min_clocks.dcn4.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); + in_out->programming->min_clocks.dcn4x.active.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.active.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); } static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -123,9 +122,9 @@ static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); - in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); - in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); } static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -147,9 +146,9 @@ static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_ get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); - in_out->programming->min_clocks.dcn4.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency); - in_out->programming->min_clocks.dcn4.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency); - in_out->programming->min_clocks.dcn4.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency); + in_out->programming->min_clocks.dcn4x.idle.uclk_khz = dml_round_up(min_uclk_avg > min_uclk_latency ? min_uclk_avg : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.idle.fclk_khz = dml_round_up(min_fclk_avg > min_fclk_latency ? min_fclk_avg : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.idle.dcfclk_khz = dml_round_up(min_dcfclk_avg > min_dcfclk_latency ? min_dcfclk_avg : min_dcfclk_latency); } static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double margin, unsigned long vco_freq_khz, unsigned long *rounded_khz, uint32_t *divider_id) @@ -204,6 +203,26 @@ static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double ma return true; } +static bool round_to_non_dfs_granularity(unsigned long dispclk_khz, unsigned long dpprefclk_khz, unsigned long dtbrefclk_khz, + unsigned long *rounded_dispclk_khz, unsigned long *rounded_dpprefclk_khz, unsigned long *rounded_dtbrefclk_khz) +{ + unsigned long pll_frequency_khz; + + pll_frequency_khz = (unsigned long) math_max2(600000, math_ceil2(math_max3(dispclk_khz, dpprefclk_khz, dtbrefclk_khz), 1000)); + + *rounded_dispclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dispclk_khz, 32); + + *rounded_dpprefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dpprefclk_khz, 32); + + if (dtbrefclk_khz > 0) { + *rounded_dtbrefclk_khz = pll_frequency_khz / (unsigned long) math_min2(pll_frequency_khz / dtbrefclk_khz, 32); + } else { + *rounded_dtbrefclk_khz = 0; + } + + return true; +} + static bool round_up_and_copy_to_next_dpm(unsigned long min_value, unsigned long *rounded_value, const struct dml2_clk_table *clock_table) { bool result = false; @@ -233,25 +252,25 @@ static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_progr { bool result; - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.dcfclk_khz, &state_table->dcfclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.fclk_khz, &state_table->fclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.fclk_khz, &state_table->fclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.active.uclk_khz, &state_table->uclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.active.uclk_khz, &state_table->uclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz, &state_table->dcfclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.fclk_khz, &state_table->fclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz, &state_table->fclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.svp_prefetch.uclk_khz, &state_table->uclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz, &state_table->uclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.dcfclk_khz, &state_table->dcfclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.fclk_khz, &state_table->fclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.fclk_khz, &state_table->fclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.idle.uclk_khz, &state_table->uclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, &state_table->uclk); return result; } @@ -263,12 +282,12 @@ static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_pro result = false; for (index = 0; index < state_table->uclk.num_clk_values; index++) { - if (display_cfg->min_clocks.dcn4.active.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.active.fclk_khz <= state_table->fclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.active.uclk_khz <= state_table->uclk.clk_values_khz[index]) { - display_cfg->min_clocks.dcn4.active.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.active.fclk_khz = state_table->fclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.active.uclk_khz = state_table->uclk.clk_values_khz[index]; + if (display_cfg->min_clocks.dcn4x.active.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.active.fclk_khz <= state_table->fclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.active.uclk_khz <= state_table->uclk.clk_values_khz[index]) { + display_cfg->min_clocks.dcn4x.active.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.active.fclk_khz = state_table->fclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.active.uclk_khz = state_table->uclk.clk_values_khz[index]; result = true; break; } @@ -277,12 +296,12 @@ static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_pro if (result) { result = false; for (index = 0; index < state_table->uclk.num_clk_values; index++) { - if (display_cfg->min_clocks.dcn4.idle.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.idle.fclk_khz <= state_table->fclk.clk_values_khz[index] && - display_cfg->min_clocks.dcn4.idle.uclk_khz <= state_table->uclk.clk_values_khz[index]) { - display_cfg->min_clocks.dcn4.idle.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.idle.fclk_khz = state_table->fclk.clk_values_khz[index]; - display_cfg->min_clocks.dcn4.idle.uclk_khz = state_table->uclk.clk_values_khz[index]; + if (display_cfg->min_clocks.dcn4x.idle.dcfclk_khz <= state_table->dcfclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.idle.fclk_khz <= state_table->fclk.clk_values_khz[index] && + display_cfg->min_clocks.dcn4x.idle.uclk_khz <= state_table->uclk.clk_values_khz[index]) { + display_cfg->min_clocks.dcn4x.idle.dcfclk_khz = state_table->dcfclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.idle.fclk_khz = state_table->fclk.clk_values_khz[index]; + display_cfg->min_clocks.dcn4x.idle.uclk_khz = state_table->uclk.clk_values_khz[index]; result = true; break; } @@ -290,9 +309,9 @@ static bool map_soc_min_clocks_to_dpm_coarse_grained(struct dml2_display_cfg_pro } // SVP is not supported on any coarse grained SoCs - display_cfg->min_clocks.dcn4.svp_prefetch.dcfclk_khz = 0; - display_cfg->min_clocks.dcn4.svp_prefetch.fclk_khz = 0; - display_cfg->min_clocks.dcn4.svp_prefetch.uclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.fclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch.uclk_khz = 0; return result; } @@ -325,30 +344,30 @@ static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mo result = map_soc_min_clocks_to_dpm_coarse_grained(display_cfg, state_table); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dispclk_khz, &state_table->dispclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dispclk_khz, &state_table->dispclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.deepsleep_dcfclk_khz, &state_table->dcfclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.deepsleep_dcfclk_khz, &state_table->dcfclk); for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { if (result) - result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4.dppclk_khz, &state_table->dppclk); + result = round_up_to_next_dpm(&display_cfg->plane_programming[i].min_clocks.dcn4x.dppclk_khz, &state_table->dppclk); } for (i = 0; i < display_cfg->display_config.num_streams; i++) { if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.dscclk_khz, &state_table->dscclk); + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dscclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dscclk_khz, &state_table->dscclk); if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.dtbclk_khz, &state_table->dtbclk); + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].dtbclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.dtbclk_khz, &state_table->dtbclk); if (result) - result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4.phyclk_khz, &state_table->phyclk); + result = round_up_and_copy_to_next_dpm(mode_support_result->per_stream[i].phyclk_khz, &display_cfg->stream_programming[i].min_clocks.dcn4x.phyclk_khz, &state_table->phyclk); } if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dpprefclk_khz, &state_table->dppclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dpprefclk_khz, &state_table->dppclk); if (result) - result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4.dtbrefclk_khz, &state_table->dtbclk); + result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.dtbrefclk_khz, &state_table->dtbclk); return result; } @@ -516,15 +535,15 @@ static bool determine_power_management_features_with_fams(struct dml2_dpmm_map_m static void clamp_uclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) { - in_out->programming->min_clocks.dcn4.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.idle.uclk_khz = in_out->soc_bb->clk_table.uclk.clk_values_khz[in_out->soc_bb->clk_table.uclk.num_clk_values - 1]; } static void clamp_fclk_to_max(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) { - in_out->programming->min_clocks.dcn4.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; - in_out->programming->min_clocks.dcn4.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; + in_out->programming->min_clocks.dcn4x.idle.fclk_khz = in_out->soc_bb->clk_table.fclk.clk_values_khz[in_out->soc_bb->clk_table.fclk.num_clk_values - 1]; } static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -540,14 +559,14 @@ static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_o // In NV4, there's no support for FCLK or DCFCLK DPM change before SVP prefetch starts, therefore // active minimums must be boosted to prefetch minimums - if (in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4.active.uclk_khz) - in_out->programming->min_clocks.dcn4.active.uclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.uclk_khz; + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz > in_out->programming->min_clocks.dcn4x.active.uclk_khz) + in_out->programming->min_clocks.dcn4x.active.uclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz; - if (in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4.active.fclk_khz) - in_out->programming->min_clocks.dcn4.active.fclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.fclk_khz; + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz > in_out->programming->min_clocks.dcn4x.active.fclk_khz) + in_out->programming->min_clocks.dcn4x.active.fclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz; - if (in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4.active.dcfclk_khz) - in_out->programming->min_clocks.dcn4.active.dcfclk_khz = in_out->programming->min_clocks.dcn4.svp_prefetch.dcfclk_khz; + if (in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz > in_out->programming->min_clocks.dcn4x.active.dcfclk_khz) + in_out->programming->min_clocks.dcn4x.active.dcfclk_khz = in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz; // need some massaging for the dispclk ramping cases: dispclk_khz = mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0) * (1.0 + in_out->ip->dispclk_ramp_margin_percent / 100.0); @@ -556,34 +575,42 @@ static bool map_mode_to_soc_dpm(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_o // but still the required dispclk can be more than the maximum dispclk speed: dispclk_khz = math_max2(dispclk_khz, mode_support_result->global.dispclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); - add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dispclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dispclk_did); - // DPP Ref is always set to max of all DPP clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - if (in_out->programming->min_clocks.dcn4.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) - in_out->programming->min_clocks.dcn4.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; + if (in_out->programming->min_clocks.dcn4x.dpprefclk_khz < mode_support_result->per_plane[i].dppclk_khz) + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = mode_support_result->per_plane[i].dppclk_khz; } + in_out->programming->min_clocks.dcn4x.dpprefclk_khz = (unsigned long) (in_out->programming->min_clocks.dcn4x.dpprefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4.dpprefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dpprefclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dpprefclk_did); - + // DTB Ref is always set to max of all DTB clocks for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - in_out->programming->plane_programming[i].min_clocks.dcn4.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4.dpprefclk_khz / 255.0 - * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4.dpprefclk_khz, 1.0)); + if (in_out->programming->min_clocks.dcn4x.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; } + in_out->programming->min_clocks.dcn4x.dtbrefclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz * (1 + in_out->soc_bb->dcn_downspread_percent / 100.0)); - // DTB Ref is always set to max of all DTB clocks - for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { - if (in_out->programming->min_clocks.dcn4.dtbrefclk_khz < mode_support_result->per_stream[i].dtbclk_khz) - in_out->programming->min_clocks.dcn4.dtbrefclk_khz = mode_support_result->per_stream[i].dtbclk_khz; + if (in_out->soc_bb->no_dfs) { + round_to_non_dfs_granularity((unsigned long)dispclk_khz, in_out->programming->min_clocks.dcn4x.dpprefclk_khz, in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, + &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz); + } else { + add_margin_and_round_to_dfs_grainularity(dispclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dispclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dispclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dpprefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dpprefclk_did); + + add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, 0.0, + (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4x.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4x.divider_ids.dtbrefclk_did); } - add_margin_and_round_to_dfs_grainularity(in_out->programming->min_clocks.dcn4.dtbrefclk_khz, in_out->soc_bb->dcn_downspread_percent / 100.0, - (unsigned long)(in_out->soc_bb->dispclk_dppclk_vco_speed_mhz * 1000), &in_out->programming->min_clocks.dcn4.dtbrefclk_khz, &in_out->programming->min_clocks.dcn4.divider_ids.dtbrefclk_did); - in_out->programming->min_clocks.dcn4.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; - in_out->programming->min_clocks.dcn4.socclk_khz = mode_support_result->global.socclk_khz; + for (i = 0; i < DML2_MAX_DCN_PIPES; i++) { + in_out->programming->plane_programming[i].min_clocks.dcn4x.dppclk_khz = (unsigned long)(in_out->programming->min_clocks.dcn4x.dpprefclk_khz / 255.0 + * math_ceil2(in_out->display_cfg->mode_support_result.per_plane[i].dppclk_khz * (1.0 + in_out->soc_bb->dcn_downspread_percent / 100.0) * 255.0 / in_out->programming->min_clocks.dcn4x.dpprefclk_khz, 1.0)); + } + + in_out->programming->min_clocks.dcn4x.deepsleep_dcfclk_khz = mode_support_result->global.dcfclk_deepsleep_khz; + in_out->programming->min_clocks.dcn4x.socclk_khz = mode_support_result->global.socclk_khz; result = map_min_clocks_to_dpm(mode_support_result, in_out->programming, &in_out->soc_bb->clk_table); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h index 3afb69dfd040..b165c58dfd11 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_DPMM_DCN4_H__ #define __DML2_DPMM_DCN4_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c index 2c983daf2dad..3861bc6c9621 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_dpmm_factory.h" #include "dml2_dpmm_dcn4.h" #include "dml2_external_lib_deps.h" @@ -21,7 +20,7 @@ bool dml2_dpmm_create(enum dml2_project_id project_id, struct dml2_dpmm_instance { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_dpmm_instance)); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h index 80b44b4c2e68..20ba2e446f1d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_DPMM_FACTORY_H__ #define __DML2_DPMM_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c index 5d8887ac766d..f4b1a7d02d42 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_mcg_dcn4.h" #include "dml_top_soc_parameter_types.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h index 19d178651435..02da6f45cbf7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_dcn4.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_MCG_DCN4_H__ #define __DML2_MCG_DCN4_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c index 55085b85f8ed..c60b8fe90819 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_mcg_factory.h" #include "dml2_mcg_dcn4.h" #include "dml2_external_lib_deps.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h index 5dfdfed04e22..ad307deca3b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_mcg/dml2_mcg_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_MCG_FACTORY_H__ #define __DML2_MCG_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c index 671f9ac2627c..8e68a8094658 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c @@ -2,22 +2,17 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_pmo_factory.h" #include "dml2_pmo_dcn3.h" static void sort(double *list_a, int list_a_size) { - double temp; // For all elements b[i] in list_b[] for (int i = 0; i < list_a_size - 1; i++) { // Find the first element of list_a that's larger than b[i] for (int j = i; j < list_a_size - 1; j++) { - if (list_a[j] > list_a[j + 1]) { - temp = list_a[j]; - list_a[j] = list_a[j + 1]; - list_a[j + 1] = temp; - } + if (list_a[j] > list_a[j + 1]) + swap(list_a[j], list_a[j + 1]); } } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h index cc350f88d4d2..f00bd9e72a86 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_PMO_DCN3_H__ #define __DML2_PMO_DCN3_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c deleted file mode 100644 index 8952dd7e36cb..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.c +++ /dev/null @@ -1,1250 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#include "dml2_pmo_factory.h" -#include "dml2_pmo_dcn4.h" - -static const int MIN_VACTIVE_MARGIN_US = 100; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding -static const int SUBVP_DRR_MARGIN_US = 100; - -static const enum dml2_pmo_pstate_strategy full_strategy_list_1_display[][4] = { - // VActive Preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then VBlank - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Finally DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, -}; - -static const int full_strategy_list_1_display_size = sizeof(full_strategy_list_1_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static const enum dml2_pmo_pstate_strategy full_strategy_list_2_display[][4] = { - // VActive only is preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then VActive + VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then VBlank only - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then SVP + VBlank - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then SVP + SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Finally DRR + DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, -}; - -static const int full_strategy_list_2_display_size = sizeof(full_strategy_list_2_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static const enum dml2_pmo_pstate_strategy full_strategy_list_3_display[][4] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // All VActive - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 1 VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, - -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 2 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, - -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // VActive + 3 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // All VBlank - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // All DRR -}; - -static const int full_strategy_list_3_display_size = sizeof(full_strategy_list_3_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static const enum dml2_pmo_pstate_strategy full_strategy_list_4_display[][4] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // All VActive - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // VActive + 1 VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, - -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // VActive + 2 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, - -// { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 3 VBlank -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, -// { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive }, - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // All Vblank - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // All DRR -}; - -static const int full_strategy_list_4_display_size = sizeof(full_strategy_list_4_display) / (sizeof(enum dml2_pmo_pstate_strategy) * 4); - -static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) -{ - bool result = true; - - if (*odm_mode == dml2_odm_mode_auto) { - switch (odms_calculated) { - case 1: - *odm_mode = dml2_odm_mode_bypass; - break; - case 2: - *odm_mode = dml2_odm_mode_combine_2to1; - break; - case 3: - *odm_mode = dml2_odm_mode_combine_3to1; - break; - case 4: - *odm_mode = dml2_odm_mode_combine_4to1; - break; - default: - result = false; - break; - } - } - - if (result) { - if (*odm_mode == dml2_odm_mode_bypass) { - *odm_mode = dml2_odm_mode_combine_2to1; - } else if (*odm_mode == dml2_odm_mode_combine_2to1) { - *odm_mode = dml2_odm_mode_combine_3to1; - } else if (*odm_mode == dml2_odm_mode_combine_3to1) { - *odm_mode = dml2_odm_mode_combine_4to1; - } else { - result = false; - } - } - - return result; -} - -static bool increase_mpc_combine_factor(unsigned int *mpc_combine_factor, unsigned int limit) -{ - if (*mpc_combine_factor < limit) { - (*mpc_combine_factor)++; - return true; - } - - return false; -} - -static int count_planes_with_stream_index(const struct dml2_display_cfg *display_cfg, unsigned int stream_index) -{ - unsigned int i; - int count; - - count = 0; - for (i = 0; i < display_cfg->num_planes; i++) { - if (display_cfg->plane_descriptors[i].stream_index == stream_index) - count++; - } - - return count; -} - -static bool optimize_dcc_mcache_no_odm(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out, - int free_pipes) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i; - bool result = true; - - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - // For pipes that failed dcc mcache check, we want to increase the pipe count. - // The logic for doing this depends on how many pipes is already being used, - // and whether it's mpcc or odm combine. - if (!in_out->dcc_mcache_supported[i]) { - // For the general case of "n displays", we can only optimize streams with an ODM combine factor of 1 - if (in_out->cfg_support_info->stream_support_info[in_out->optimized_display_cfg->plane_descriptors[i].stream_index].odms_used == 1) { - in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor = - in_out->cfg_support_info->plane_support_info[i].dpps_used; - // For each plane that is not passing mcache validation, just add another pipe to it, up to the limit. - if (free_pipes > 0) { - if (!increase_mpc_combine_factor(&in_out->optimized_display_cfg->plane_descriptors[i].overrides.mpcc_combine_factor, - pmo->mpc_combine_limit)) { - // We've reached max pipes allocatable to a single plane, so we fail. - result = false; - break; - } else { - // Successfully added another pipe to this failing plane. - free_pipes--; - } - } else { - // No free pipes to add. - result = false; - break; - } - } else { - // If the stream of this plane needs ODM combine, no further optimization can be done. - result = false; - break; - } - } - } - - return result; -} - -bool pmo_dcn4_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - unsigned int i, used_pipes, free_pipes, planes_on_stream; - bool result; - - if (in_out->display_config != in_out->optimized_display_cfg) { - memcpy(in_out->optimized_display_cfg, in_out->display_config, sizeof(struct dml2_display_cfg)); - } - - //Count number of free pipes, and check if any odm combine is in use. - used_pipes = 0; - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - used_pipes += in_out->cfg_support_info->plane_support_info[i].dpps_used; - } - free_pipes = pmo->ip_caps->pipe_count - used_pipes; - - // Optimization loop - // The goal here is to add more pipes to any planes - // which are failing mcache admissibility - result = true; - - // The optimization logic depends on whether ODM combine is enabled, and the stream count. - if (in_out->optimized_display_cfg->num_streams > 1) { - // If there are multiple streams, we are limited to only be able to optimize mcache failures on planes - // which are not ODM combined. - - result = optimize_dcc_mcache_no_odm(in_out, free_pipes); - } else if (in_out->optimized_display_cfg->num_streams == 1) { - // In single stream cases, we still optimize mcache failures when there's ODM combine with some - // additional logic. - - if (in_out->cfg_support_info->stream_support_info[0].odms_used > 1) { - // If ODM combine is enabled, then the logic is to increase ODM combine factor. - - // Optimization for streams with > 1 ODM combine factor is only supported for single display. - planes_on_stream = count_planes_with_stream_index(in_out->optimized_display_cfg, 0); - - for (i = 0; i < in_out->optimized_display_cfg->num_planes; i++) { - // For pipes that failed dcc mcache check, we want to increase the pipe count. - // The logic for doing this depends on how many pipes is already being used, - // and whether it's mpcc or odm combine. - if (!in_out->dcc_mcache_supported[i]) { - // Increasing ODM combine factor on a stream requires a free pipe for each plane on the stream. - if (free_pipes >= planes_on_stream) { - if (!increase_odm_combine_factor(&in_out->optimized_display_cfg->stream_descriptors[i].overrides.odm_mode, - in_out->cfg_support_info->plane_support_info[i].dpps_used)) { - result = false; - } else { - free_pipes -= planes_on_stream; - break; - } - } else { - result = false; - break; - } - } - } - } else { - // If ODM combine is not enabled, then we can actually use the same logic as before. - - result = optimize_dcc_mcache_no_odm(in_out, free_pipes); - } - } else { - result = true; - } - - return result; -} - -bool pmo_dcn4_initialize(struct dml2_pmo_initialize_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - - pmo->soc_bb = in_out->soc_bb; - pmo->ip_caps = in_out->ip_caps; - pmo->mpc_combine_limit = 2; - pmo->odm_combine_limit = 4; - pmo->mcg_clock_table_size = in_out->mcg_clock_table_size; - - pmo->fams_params.v1.subvp.fw_processing_delay_us = 10; - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us = 50; - pmo->fams_params.v1.subvp.refresh_rate_limit_max = 175; - pmo->fams_params.v1.subvp.refresh_rate_limit_min = 0; - - pmo->options = in_out->options; - - return true; -} - -static bool is_h_timing_divisible_by(const struct dml2_timing_cfg *timing, unsigned char denominator) -{ - /* - * Htotal, Hblank start/end, and Hsync start/end all must be divisible - * in order for the horizontal timing params to be considered divisible - * by 2. Hsync start is always 0. - */ - unsigned long h_blank_start = timing->h_total - timing->h_front_porch; - - return (timing->h_total % denominator == 0) && - (h_blank_start % denominator == 0) && - (timing->h_blank_end % denominator == 0) && - (timing->h_sync_width % denominator == 0); -} - -static bool is_dp_encoder(enum dml2_output_encoder_class encoder_type) -{ - switch (encoder_type) { - case dml2_dp: - case dml2_edp: - case dml2_dp2p0: - case dml2_none: - return true; - case dml2_hdmi: - case dml2_hdmifrl: - default: - return false; - } -} - -bool pmo_dcn4_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) -{ - unsigned int i; - const struct dml2_display_cfg *display_config = - &in_out->base_display_config->display_config; - const struct dml2_core_mode_support_result *mode_support_result = - &in_out->base_display_config->mode_support_result; - - if (in_out->instance->options->disable_dyn_odm || - (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) - return false; - - for (i = 0; i < display_config->num_planes; i++) - /* - * vmin optimization is required to be seamlessly switched off - * at any time when the new configuration is no longer - * supported. However switching from ODM combine to MPC combine - * is not always seamless. When there not enough free pipes, we - * will have to use the same secondary OPP heads as secondary - * DPP pipes in MPC combine in new state. This transition is - * expected to cause glitches. To avoid the transition, we only - * allow vmin optimization if the stream's base configuration - * doesn't require MPC combine. This condition checks if MPC - * combine is enabled. If so do not optimize the stream. - */ - if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && - mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; - - for (i = 0; i < display_config->num_streams; i++) { - if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && - in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - /* - * ODM Combine requires horizontal timing divisible by 2 so each - * ODM segment has the same size. - */ - else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - /* - * Our hardware support seamless ODM transitions for DP encoders - * only. - */ - else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; - } - - return true; -} - -bool pmo_dcn4_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out) -{ - bool is_vmin = true; - - if (in_out->vmin_limits->dispclk_khz > 0 && - in_out->display_config->mode_support_result.global.dispclk_khz > in_out->vmin_limits->dispclk_khz) - is_vmin = false; - - return is_vmin; -} - -static int find_highest_odm_load_stream_index( - const struct dml2_display_cfg *display_config, - const struct dml2_core_mode_support_result *mode_support_result) -{ - unsigned int i; - int odm_load, highest_odm_load = -1, highest_odm_load_index = -1; - - for (i = 0; i < display_config->num_streams; i++) { - odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz - / mode_support_result->cfg_support_info.stream_support_info[i].odms_used; - if (odm_load > highest_odm_load) { - highest_odm_load_index = i; - highest_odm_load = odm_load; - } - } - - return highest_odm_load_index; -} - -bool pmo_dcn4_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out) -{ - int stream_index; - const struct dml2_display_cfg *display_config = - &in_out->base_display_config->display_config; - const struct dml2_core_mode_support_result *mode_support_result = - &in_out->base_display_config->mode_support_result; - unsigned int odms_used; - struct dml2_stream_parameters *stream_descriptor; - bool optimizable = false; - - /* - * highest odm load stream must be optimizable to continue as dispclk is - * bounded by it. - */ - stream_index = find_highest_odm_load_stream_index(display_config, - mode_support_result); - - if (stream_index < 0 || - in_out->base_display_config->stage4.unoptimizable_streams[stream_index]) - return false; - - odms_used = mode_support_result->cfg_support_info.stream_support_info[stream_index].odms_used; - if ((int)odms_used >= in_out->instance->odm_combine_limit) - return false; - - memcpy(in_out->optimized_display_config, - in_out->base_display_config, - sizeof(struct display_configuation_with_meta)); - - stream_descriptor = &in_out->optimized_display_config->display_config.stream_descriptors[stream_index]; - while (!optimizable && increase_odm_combine_factor( - &stream_descriptor->overrides.odm_mode, - odms_used)) { - switch (stream_descriptor->overrides.odm_mode) { - case dml2_odm_mode_combine_2to1: - optimizable = true; - break; - case dml2_odm_mode_combine_3to1: - /* - * In ODM Combine 3:1 OTG_valid_pixel rate is 1/4 of - * actual pixel rate. Therefore horizontal timing must - * be divisible by 4. - */ - if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { - /* - * DSC h slice count must be divisible - * by 3. - */ - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 3 == 0) - optimizable = true; - } else { - optimizable = true; - } - } - break; - case dml2_odm_mode_combine_4to1: - /* - * In ODM Combine 4:1 OTG_valid_pixel rate is 1/4 of - * actual pixel rate. Therefore horizontal timing must - * be divisible by 4. - */ - if (is_h_timing_divisible_by(&display_config->stream_descriptors[stream_index].timing, 4)) { - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].dsc_enable) { - /* - * DSC h slice count must be divisible - * by 4. - */ - if (mode_support_result->cfg_support_info.stream_support_info[stream_index].num_dsc_slices % 4 == 0) - optimizable = true; - } else { - optimizable = true; - } - } - break; - case dml2_odm_mode_auto: - case dml2_odm_mode_bypass: - case dml2_odm_mode_split_1to2: - case dml2_odm_mode_mso_1to2: - case dml2_odm_mode_mso_1to4: - default: - break; - } - } - - return optimizable; -} - -static bool are_timings_trivially_synchronizable(const struct display_configuation_with_meta *display_config, int mask) -{ - unsigned char i; - bool identical = true; - bool contains_drr = false; - unsigned char remap_array[DML2_MAX_PLANES]; - unsigned char remap_array_size = 0; - - // Create a remap array to enable simple iteration through only masked stream indicies - for (i = 0; i < display_config->display_config.num_streams; i++) { - if (mask & (0x1 << i)) { - remap_array[remap_array_size++] = i; - } - } - - // 0 or 1 display is always trivially synchronizable - if (remap_array_size <= 1) - return true; - - for (i = 1; i < remap_array_size; i++) { - if (memcmp(&display_config->display_config.stream_descriptors[remap_array[i - 1]].timing, - &display_config->display_config.stream_descriptors[remap_array[i]].timing, - sizeof(struct dml2_timing_cfg))) { - identical = false; - break; - } - } - - for (i = 0; i < remap_array_size; i++) { - if (display_config->display_config.stream_descriptors[remap_array[i]].timing.drr_config.enabled) { - contains_drr = true; - break; - } - } - - return !contains_drr && identical; -} - -static void set_bit_in_bitfield(unsigned int *bit_field, unsigned int bit_offset) -{ - *bit_field = *bit_field | (0x1 << bit_offset); -} - -static bool is_bit_set_in_bitfield(unsigned int bit_field, unsigned int bit_offset) -{ - if (bit_field & (0x1 << bit_offset)) - return true; - - return false; -} - -static bool are_all_timings_drr_enabled(const struct display_configuation_with_meta *display_config, int mask) -{ - unsigned char i; - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(mask, i)) { - if (!display_config->display_config.stream_descriptors[i].timing.drr_config.enabled) - return false; - } - } - - return true; -} - -static void insert_into_candidate_list(const enum dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) -{ - int stream_index; - - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = true; - - for (stream_index = 0; stream_index < stream_count; stream_index++) { - scratch->pmo_dcn4.per_stream_pstate_strategy[scratch->pmo_dcn4.num_pstate_candidates][stream_index] = per_stream_pstate_strategy[stream_index]; - - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank) - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = false; - } - - scratch->pmo_dcn4.num_pstate_candidates++; -} - -static bool all_planes_match_strategy(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_strategy strategy) -{ - unsigned char i; - enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na; - - if (strategy == dml2_pmo_pstate_strategy_vactive) - matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive; - else if (strategy == dml2_pmo_pstate_strategy_vblank) - matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank; - else if (strategy == dml2_pmo_pstate_strategy_fw_svp) - matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; - else if (strategy == dml2_pmo_pstate_strategy_fw_drr) - matching_strategy = dml2_uclk_pstate_change_strategy_force_drr; - - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(plane_mask, i)) { - if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto && - display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != matching_strategy) - return false; - } - } - - return true; -} - -static bool subvp_subvp_schedulable(struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - unsigned char *svp_stream_indicies, char svp_stream_count) -{ - struct dml2_pmo_scratch *s = &pmo->scratch; - int i; - int microschedule_lines, time_us, refresh_hz; - int max_microschedule_us = 0; - int vactive1_us, vactive2_us, vblank1_us, vblank2_us; - - const struct dml2_timing_cfg *svp_timing1 = 0; - const struct dml2_implicit_svp_meta *svp_meta1 = 0; - - const struct dml2_timing_cfg *svp_timing2 = 0; - - if (svp_stream_count <= 1) - return true; - else if (svp_stream_count > 2) - return false; - - /* Loop to calculate the maximum microschedule time between the two SubVP pipes, - * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. - */ - for (i = 0; i < svp_stream_count; i++) { - svp_timing1 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[i]].timing; - svp_meta1 = &s->pmo_dcn4.stream_svp_meta[svp_stream_indicies[i]]; - - microschedule_lines = svp_meta1->v_active; - - // Round up when calculating microschedule time (+ 1 at the end) - time_us = (int)((microschedule_lines * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000 + - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us + pmo->fams_params.v1.subvp.fw_processing_delay_us + 1); - - if (time_us > max_microschedule_us) - max_microschedule_us = time_us; - - refresh_hz = (int)((double)(svp_timing1->pixel_clock_khz * 1000) / (svp_timing1->v_total * svp_timing1->h_total)); - - if (refresh_hz < pmo->fams_params.v1.subvp.refresh_rate_limit_min || - refresh_hz > pmo->fams_params.v1.subvp.refresh_rate_limit_max) { - return false; - } - } - - svp_timing1 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[0]].timing; - svp_meta1 = &s->pmo_dcn4.stream_svp_meta[svp_stream_indicies[0]]; - - vactive1_us = (int)((svp_timing1->v_active * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000); - - vblank1_us = (int)(((svp_timing1->v_total - svp_timing1->v_active) * svp_timing1->h_total) / (double)(svp_timing1->pixel_clock_khz * 1000) * 1000000); - - svp_timing2 = &display_cfg->display_config.stream_descriptors[svp_stream_indicies[1]].timing; - - vactive2_us = (int)((svp_timing2->v_active * svp_timing2->h_total) / (double)(svp_timing2->pixel_clock_khz * 1000) * 1000000); - - vblank2_us = (int)(((svp_timing2->v_total - svp_timing2->v_active) * svp_timing2->h_total) / (double)(svp_timing2->pixel_clock_khz * 1000) * 1000000); - - if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && - (vactive2_us - vblank1_us) / 2 > max_microschedule_us) - return true; - - return false; -} - -static bool validate_svp_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int svp_stream_mask) -{ - bool result = false; - unsigned char stream_index; - - unsigned char svp_stream_indicies[2] = { 0 }; - unsigned char svp_stream_count = 0; - - // Find the SVP streams, store only the first 2, but count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { - if (svp_stream_count < 2) - svp_stream_indicies[svp_stream_count] = stream_index; - - svp_stream_count++; - } - } - - if (svp_stream_count == 1) { - result = true; // 1 SVP is always co_functional - } else if (svp_stream_count == 2) { - result = subvp_subvp_schedulable(pmo, display_cfg, svp_stream_indicies, svp_stream_count); - } - - return result; -} - -static bool validate_drr_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int drr_stream_mask) -{ - unsigned char stream_index; - int drr_stream_count = 0; - - // Find the SVP streams and count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(drr_stream_mask, stream_index)) { - drr_stream_count++; - } - } - - return drr_stream_count <= 4; -} - -static bool validate_svp_drr_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int svp_stream_mask, int drr_stream_mask) -{ - unsigned char stream_index; - int drr_stream_count = 0; - int svp_stream_count = 0; - - int prefetch_us = 0; - int mall_region_us = 0; - int drr_frame_us = 0; // nominal frame time - int subvp_active_us = 0; - int stretched_drr_us = 0; - int drr_stretched_vblank_us = 0; - int max_vblank_mallregion = 0; - - const struct dml2_timing_cfg *svp_timing = 0; - const struct dml2_timing_cfg *drr_timing = 0; - const struct dml2_implicit_svp_meta *svp_meta = 0; - - bool schedulable = false; - - // Find the SVP streams and count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { - svp_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; - svp_stream_count++; - } - if (is_bit_set_in_bitfield(drr_stream_mask, stream_index)) { - drr_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - drr_stream_count++; - } - } - - if (svp_stream_count == 1 && drr_stream_count == 1 && svp_timing != drr_timing) { - prefetch_us = (int)((svp_meta->v_total - svp_meta->v_front_porch) - * svp_timing->h_total / (double)(svp_timing->pixel_clock_khz * 1000) * 1000000 + - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us); - - subvp_active_us = (int)(svp_timing->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - drr_frame_us = (int)(drr_timing->v_total * drr_timing->h_total / - (double)(drr_timing->pixel_clock_khz * 1000) * 1000000); - - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = (int)(svp_meta->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - - drr_stretched_vblank_us = (int)((drr_timing->v_total - drr_timing->v_active) * drr_timing->h_total / - (double)(drr_timing->pixel_clock_khz * 1000) * 1000000 + (stretched_drr_us - drr_frame_us)); - - max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; - - /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the - * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis - * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - * and the max of (VBLANK blanking time, MALL region)). - */ - if (stretched_drr_us < (1 / (double)drr_timing->drr_config.min_refresh_uhz) * 1000000 * 1000000 && - subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) - schedulable = true; - } - - return schedulable; -} - -static bool validate_svp_vblank_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int svp_stream_mask, int vblank_stream_mask) -{ - unsigned char stream_index; - int vblank_stream_count = 0; - int svp_stream_count = 0; - - const struct dml2_timing_cfg *svp_timing = 0; - const struct dml2_timing_cfg *vblank_timing = 0; - const struct dml2_implicit_svp_meta *svp_meta = 0; - - int prefetch_us = 0; - int mall_region_us = 0; - int vblank_frame_us = 0; - int subvp_active_us = 0; - int vblank_blank_us = 0; - int max_vblank_mallregion = 0; - - bool schedulable = false; - - // Find the SVP streams and count all of them - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (is_bit_set_in_bitfield(svp_stream_mask, stream_index)) { - svp_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - svp_meta = &pmo->scratch.pmo_dcn4.stream_svp_meta[stream_index]; - svp_stream_count++; - } - if (is_bit_set_in_bitfield(vblank_stream_mask, stream_index)) { - vblank_timing = &display_cfg->display_config.stream_descriptors[stream_index].timing; - vblank_stream_count++; - } - } - - if (svp_stream_count == 1 && vblank_stream_count > 0) { - // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe - // Also include the prefetch end to mallstart delay time - prefetch_us = (int)((svp_meta->v_total - svp_meta->v_front_porch) * svp_timing->h_total - / (double)(svp_timing->pixel_clock_khz * 1000) * 1000000 + - pmo->fams_params.v1.subvp.prefetch_end_to_mall_start_us); - - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = (int)(svp_meta->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - vblank_frame_us = (int)(vblank_timing->v_total * vblank_timing->h_total / - (double)(vblank_timing->pixel_clock_khz * 1000) * 1000000); - - vblank_blank_us = (int)((vblank_timing->v_total - vblank_timing->v_active) * vblank_timing->h_total / - (double)(vblank_timing->pixel_clock_khz * 1000) * 1000000); - - subvp_active_us = (int)(svp_timing->v_active * svp_timing->h_total / - (double)(svp_timing->pixel_clock_khz * 1000) * 1000000); - - max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; - - // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - // and the max of (VBLANK blanking time, MALL region) - // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) - if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) - schedulable = true; - } - return schedulable; -} - -static bool validate_drr_vblank_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, int drr_stream_mask, int vblank_stream_mask) -{ - return false; -} - -static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[4]) -{ - struct dml2_pmo_scratch *s = &pmo->scratch; - - unsigned char stream_index = 0; - - unsigned int svp_count = 0; - unsigned int svp_stream_mask = 0; - unsigned int drr_count = 0; - unsigned int drr_stream_mask = 0; - unsigned int vactive_count = 0; - unsigned int vactive_stream_mask = 0; - unsigned int vblank_count = 0; - unsigned int vblank_stream_mask = 0; - - bool strategy_matches_forced_requirements = true; - - bool admissible = false; - - // Tabulate everything - for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - - if (!all_planes_match_strategy(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], - per_stream_pstate_strategy[stream_index])) { - strategy_matches_forced_requirements = false; - break; - } - - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp) { - svp_count++; - set_bit_in_bitfield(&svp_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - drr_count++; - set_bit_in_bitfield(&drr_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vactive) { - vactive_count++; - set_bit_in_bitfield(&vactive_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank) { - vblank_count++; - set_bit_in_bitfield(&vblank_stream_mask, stream_index); - } - } - - if (!strategy_matches_forced_requirements) - return false; - - // Check for trivial synchronization for vblank - if (vblank_count > 0 && (pmo->options->disable_vblank || !are_timings_trivially_synchronizable(display_cfg, vblank_stream_mask))) - return false; - - if (svp_count > 0 && pmo->options->disable_svp) - return false; - - if (drr_count > 0 && (pmo->options->disable_drr_var || !are_all_timings_drr_enabled(display_cfg, drr_stream_mask))) - return false; - - // Validate for FAMS admissibiliy - if (svp_count == 0 && drr_count == 0) { - // No FAMS - admissible = true; - } else { - admissible = false; - if (svp_count > 0 && drr_count == 0 && vactive_count == 0 && vblank_count == 0) { - // All SVP - admissible = validate_svp_cofunctionality(pmo, display_cfg, svp_stream_mask); - } else if (svp_count == 0 && drr_count > 0 && vactive_count == 0 && vblank_count == 0) { - // All DRR - admissible = validate_drr_cofunctionality(pmo, display_cfg, drr_stream_mask); - } else if (svp_count > 0 && drr_count > 0 && vactive_count == 0 && vblank_count == 0) { - // SVP + DRR - admissible = validate_svp_drr_cofunctionality(pmo, display_cfg, svp_stream_mask, drr_stream_mask); - } else if (svp_count > 0 && drr_count == 0 && vactive_count == 0 && vblank_count > 0) { - // SVP + VBlank - admissible = validate_svp_vblank_cofunctionality(pmo, display_cfg, svp_stream_mask, vblank_stream_mask); - } else if (svp_count == 0 && drr_count > 0 && vactive_count == 0 && vblank_count > 0) { - // DRR + VBlank - admissible = validate_drr_vblank_cofunctionality(pmo, display_cfg, drr_stream_mask, vblank_stream_mask); - } - } - - return admissible; -} - -static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) -{ - unsigned char i; - int min_vactive_margin_us = 0xFFFFFFF; - - for (i = 0; i < DML2_MAX_PLANES; i++) { - if (is_bit_set_in_bitfield(plane_mask, i)) { - if (display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active < min_vactive_margin_us) - min_vactive_margin_us = display_cfg->mode_support_result.cfg_support_info.plane_support_info[i].dram_change_latency_hiding_margin_in_active; - } - } - - return min_vactive_margin_us; -} - -bool pmo_dcn4_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out) -{ - struct dml2_pmo_instance *pmo = in_out->instance; - struct dml2_optimization_stage3_state *state = &in_out->base_display_config->stage3; - struct dml2_pmo_scratch *s = &pmo->scratch; - - struct display_configuation_with_meta *display_config; - const struct dml2_plane_parameters *plane_descriptor; - const enum dml2_pmo_pstate_strategy (*strategy_list)[4] = 0; - unsigned int strategy_list_size = 0; - unsigned int plane_index, stream_index, i; - - state->performed = true; - - display_config = in_out->base_display_config; - display_config->display_config.overrides.enable_subvp_implicit_pmo = true; - - memset(s, 0, sizeof(struct dml2_pmo_scratch)); - - pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; - pmo->scratch.pmo_dcn4.max_latency_index = pmo->mcg_clock_table_size - 1; - pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; - - // First build the stream plane mask (array of bitfields indexed by stream, indicating plane mapping) - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - plane_descriptor = &display_config->display_config.plane_descriptors[plane_index]; - - set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index); - - state->pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; - } - - // Figure out which streams can do vactive, and also build up implicit SVP meta - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (get_vactive_pstate_margin(display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) >= - MIN_VACTIVE_MARGIN_US) - set_bit_in_bitfield(&s->pmo_dcn4.stream_vactive_capability_mask, stream_index); - - s->pmo_dcn4.stream_svp_meta[stream_index].valid = true; - s->pmo_dcn4.stream_svp_meta[stream_index].v_active = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_active; - s->pmo_dcn4.stream_svp_meta[stream_index].v_total = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_total; - s->pmo_dcn4.stream_svp_meta[stream_index].v_front_porch = 1; - } - - switch (display_config->display_config.num_streams) { - case 1: - strategy_list = full_strategy_list_1_display; - strategy_list_size = full_strategy_list_1_display_size; - break; - case 2: - strategy_list = full_strategy_list_2_display; - strategy_list_size = full_strategy_list_2_display_size; - break; - case 3: - strategy_list = full_strategy_list_3_display; - strategy_list_size = full_strategy_list_3_display_size; - break; - case 4: - strategy_list = full_strategy_list_4_display; - strategy_list_size = full_strategy_list_4_display_size; - break; - default: - strategy_list_size = 0; - break; - } - - if (strategy_list_size == 0) - return false; - - s->pmo_dcn4.num_pstate_candidates = 0; - - for (i = 0; i < strategy_list_size && i < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { - if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, strategy_list[i])) { - insert_into_candidate_list(strategy_list[i], display_config->display_config.num_streams, s); - } - } - - if (s->pmo_dcn4.num_pstate_candidates > 0) { - // There's this funny case... - // If the first entry in the candidate list is all vactive, then we can consider it "tested", so the current index is 0 - // Otherwise the current index should be -1 because we run the optimization at least once - s->pmo_dcn4.cur_pstate_candidate = 0; - for (i = 0; i < display_config->display_config.num_streams; i++) { - if (s->pmo_dcn4.per_stream_pstate_strategy[0][i] != dml2_pmo_pstate_strategy_vactive) { - s->pmo_dcn4.cur_pstate_candidate = -1; - break; - } - } - return true; - } else { - return false; - } -} - -static void reset_display_configuration(struct display_configuation_with_meta *display_config) -{ - unsigned int plane_index; - unsigned int stream_index; - struct dml2_plane_parameters *plane; - - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - display_config->stage3.stream_svp_meta[stream_index].valid = false; - } - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Unset SubVP - plane->overrides.legacy_svp_config = dml2_svp_mode_override_auto; - - // Remove reserve time - plane->overrides.reserved_vblank_time_ns = 0; - - // Reset strategy to auto - plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_not_supported; - } -} - -static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - struct dml2_plane_parameters *plane; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Setup DRR - plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_drr; - } - } -} - -static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - int stream_index = -1; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom; - } - } - - if (stream_index >= 0) { - display_config->stage3.stream_svp_meta[stream_index].valid = true; - display_config->stage3.stream_svp_meta[stream_index].v_active = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_active; - display_config->stage3.stream_svp_meta[stream_index].v_total = - display_config->mode_support_result.cfg_support_info.stream_support_info[stream_index].phantom_v_total; - display_config->stage3.stream_svp_meta[stream_index].v_front_porch = 1; - } -} - -static void setup_planes_for_vblank_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - struct dml2_plane_parameters *plane; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - plane = &display_config->display_config.plane_descriptors[plane_index]; - - // Setup reserve time - plane->overrides.reserved_vblank_time_ns = 400 * 1000; - - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; - } - } -} - -static void setup_planes_for_vactive_by_mask(struct display_configuation_with_meta *display_config, int plane_mask) -{ - unsigned char plane_index; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; - } - } -} - -static bool setup_display_config(struct display_configuation_with_meta *display_config, struct dml2_pmo_scratch *scratch, int strategy_index) -{ - bool success = true; - unsigned char stream_index; - - reset_display_configuration(display_config); - - for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_na) { - success = false; - break; - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vblank) { - setup_planes_for_vblank_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { - setup_planes_for_svp_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - setup_planes_for_drr_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vactive) { - setup_planes_for_vactive_by_mask(display_config, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } - } - - return success; -} - -static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask) -{ - int min_time_us = 0xFFFFFF; - unsigned char plane_index = 0; - - for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { - if (is_bit_set_in_bitfield(plane_mask, plane_index)) { - if (min_time_us > (display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000)) - min_time_us = display_config->display_config.plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000; - } - } - return min_time_us; -} - -bool pmo_dcn4_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out) -{ - bool p_state_supported = true; - unsigned int stream_index; - struct dml2_pmo_scratch *s = &in_out->instance->scratch; - - if (s->pmo_dcn4.cur_pstate_candidate < 0) - return false; - - for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { - - if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vactive) { - if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_US) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vblank) { - if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < - in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { - if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - if (!all_planes_match_strategy(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr)) { - p_state_supported = false; - break; - } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_na) { - p_state_supported = false; - break; - } - } - - return p_state_supported; -} - -bool pmo_dcn4_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out) -{ - bool success = false; - struct dml2_pmo_scratch *s = &in_out->instance->scratch; - - memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); - - if (in_out->last_candidate_failed) { - if (s->pmo_dcn4.allow_state_increase_for_strategy[s->pmo_dcn4.cur_pstate_candidate] && - s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index) { - s->pmo_dcn4.cur_latency_index++; - - success = true; - } - } - - if (!success) { - s->pmo_dcn4.cur_latency_index = s->pmo_dcn4.min_latency_index; - s->pmo_dcn4.cur_pstate_candidate++; - - if (s->pmo_dcn4.cur_pstate_candidate < s->pmo_dcn4.num_pstate_candidates) { - success = true; - } - } - - if (success) { - in_out->optimized_display_config->stage3.min_clk_index_for_latency = s->pmo_dcn4.cur_latency_index; - setup_display_config(in_out->optimized_display_config, &in_out->instance->scratch, in_out->instance->scratch.pmo_dcn4.cur_pstate_candidate); - } - - return success; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h deleted file mode 100644 index 09cacc933d21..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4.h +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - - -#ifndef __DML2_PMO_DCN4_H__ -#define __DML2_PMO_DCN4_H__ - -#include "dml2_internal_shared_types.h" - -bool pmo_dcn4_initialize(struct dml2_pmo_initialize_in_out *in_out); - -bool pmo_dcn4_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_out *in_out); - -bool pmo_dcn4_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out); -bool pmo_dcn4_test_for_vmin(struct dml2_pmo_test_for_vmin_in_out *in_out); -bool pmo_dcn4_optimize_for_vmin(struct dml2_pmo_optimize_for_vmin_in_out *in_out); - -bool pmo_dcn4_init_for_pstate_support(struct dml2_pmo_init_for_pstate_support_in_out *in_out); -bool pmo_dcn4_test_for_pstate_support(struct dml2_pmo_test_for_pstate_support_in_out *in_out); -bool pmo_dcn4_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pstate_support_in_out *in_out); - -bool pmo_dcn4_unit_test(void); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 603036df68ba..30767f330fd4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1,122 +1,181 @@ -/* -* Copyright 2022 Advanced Micro Devices, Inc. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE. -* -* Authors: AMD -* -*/ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. #include "dml2_pmo_factory.h" -#include "dml2_pmo_dcn4.h" #include "dml2_debug.h" #include "lib_float_math.h" #include "dml2_pmo_dcn4_fams2.h" static const double MIN_VACTIVE_MARGIN_PCT = 0.25; // We need more than non-zero margin because DET buffer granularity can alter vactive latency hiding -static const enum dml2_pmo_pstate_strategy base_strategy_list_1_display[][PMO_DCN4_MAX_DISPLAYS] = { +static const struct dml2_pmo_pstate_strategy base_strategy_list_1_display[] = { // VActive Preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then VBlank - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Finally DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, + + // Then DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + + // Finally VBlank, but allow base clocks for latency to increase + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1_display) / sizeof(struct dml2_pmo_pstate_strategy); -static const enum dml2_pmo_pstate_strategy base_strategy_list_2_display[][PMO_DCN4_MAX_DISPLAYS] = { +static const struct dml2_pmo_pstate_strategy base_strategy_list_2_display[] = { // VActive only is preferred - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then VActive + VBlank - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, // Then VBlank only - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, // Then SVP + VBlank - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, // Then SVP + DRR - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then SVP + SVP - { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, // Then DRR + VActive - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Then DRR + VBlank - //{ dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, - - // Finally DRR + DRR - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + + // Then DRR + DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + + // Finally VBlank, but allow base clocks for latency to increase + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); - -static const enum dml2_pmo_pstate_strategy base_strategy_list_3_display[][PMO_DCN4_MAX_DISPLAYS] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // All VActive - - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // VActive + 1 VBlank - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, // VActive + 2 VBlank - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, // All VBlank - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // VBlank + 1 DRR - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // VBlank + 2 DRR - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, // All DRR +static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2_display) / sizeof(struct dml2_pmo_pstate_strategy); + +static const struct dml2_pmo_pstate_strategy base_strategy_list_3_display[] = { + // All VActive + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + + // VActive + 1 VBlank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, + + // All VBlank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = false, + }, + + // All DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + + // All VBlank, with state increase allowed + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); - -static const enum dml2_pmo_pstate_strategy base_strategy_list_4_display[][PMO_DCN4_MAX_DISPLAYS] = { - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, // All VActive - - { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, // VActive + 1 VBlank - - //{ dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 2 VBlank - - //{ dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // VActive + 3 VBlank - - { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, // All Vblank - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 1 DRR - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 2 DRR - - //{ dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // VBlank + 3 DRR - - { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, // All DRR +static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3_display) / sizeof(struct dml2_pmo_pstate_strategy); + +static const struct dml2_pmo_pstate_strategy base_strategy_list_4_display[] = { + // All VActive + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, + .allow_state_increase = true, + }, + + // VActive + 1 VBlank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, + .allow_state_increase = false, + }, + + // All Vblank + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + .allow_state_increase = false, + }, + + // All DRR + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, + .allow_state_increase = true, + }, + + // All VBlank, with state increase allowed + /* + { + .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + .allow_state_increase = true, + }, + */ }; -static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / (sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static const int base_strategy_list_4_display_size = sizeof(base_strategy_list_4_display) / sizeof(struct dml2_pmo_pstate_strategy); static bool increase_odm_combine_factor(enum dml2_odm_mode *odm_mode, int odms_calculated) @@ -296,9 +355,9 @@ bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_o return result; } -static enum dml2_pmo_pstate_strategy convert_strategy_to_drr_variant(const enum dml2_pmo_pstate_strategy base_strategy) +static enum dml2_pmo_pstate_method convert_strategy_to_drr_variant(const enum dml2_pmo_pstate_method base_strategy) { - enum dml2_pmo_pstate_strategy variant_strategy = 0; + enum dml2_pmo_pstate_method variant_strategy = 0; switch (base_strategy) { case dml2_pmo_pstate_strategy_vactive: @@ -327,11 +386,9 @@ static enum dml2_pmo_pstate_strategy convert_strategy_to_drr_variant(const enum return variant_strategy; } -static enum dml2_pmo_pstate_strategy(*get_expanded_strategy_list( - struct dml2_pmo_init_data *init_data, - int stream_count))[PMO_DCN4_MAX_DISPLAYS] +static struct dml2_pmo_pstate_strategy *get_expanded_strategy_list(struct dml2_pmo_init_data *init_data, int stream_count) { - enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; switch (stream_count) { case 1: @@ -361,23 +418,23 @@ static unsigned int get_num_expanded_strategies( } static void insert_strategy_into_expanded_list( - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_init_data *init_data) { - enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; expanded_strategy_list = get_expanded_strategy_list(init_data, stream_count); if (expanded_strategy_list) { - memcpy(&expanded_strategy_list[init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]++], - per_stream_pstate_strategy, - sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); + memcpy(&expanded_strategy_list[init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy)); + + init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]++; } } static void expand_base_strategy(struct dml2_pmo_instance *pmo, - const enum dml2_pmo_pstate_strategy base_strategy_list[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *base_strategy, unsigned int stream_count) { bool skip_to_next_stream; @@ -386,19 +443,21 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, unsigned int i, j; unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; unsigned int stream_iteration_indices[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - enum dml2_pmo_pstate_strategy cur_strategy_list[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + struct dml2_pmo_pstate_strategy cur_strategy_list = { 0 }; /* determine number of displays per method */ for (i = 0; i < stream_count; i++) { /* increment the count of the earliest index with the same method */ for (j = 0; j < stream_count; j++) { - if (base_strategy_list[i] == base_strategy_list[j]) { + if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { num_streams_per_method[j] = num_streams_per_method[j] + 1; break; } } } + cur_strategy_list.allow_state_increase = base_strategy->allow_state_increase; + i = 0; /* uses a while loop instead of recursion to build permutations of base strategy */ while (stream_iteration_indices[0] < stream_count) { @@ -409,12 +468,12 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, /* determine what to do for this iteration */ if (stream_iteration_indices[i] < stream_count && num_streams_per_method[stream_iteration_indices[i]] != 0) { /* decrement count and assign method */ - cur_strategy_list[i] = base_strategy_list[stream_iteration_indices[i]]; + cur_strategy_list.per_stream_pstate_method[i] = base_strategy->per_stream_pstate_method[stream_iteration_indices[i]]; num_streams_per_method[stream_iteration_indices[i]] -= 1; if (i >= stream_count - 1) { /* insert into strategy list */ - insert_strategy_into_expanded_list(cur_strategy_list, stream_count, &pmo->init_data); + insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, &pmo->init_data); expanded_strategy_added = true; } else { /* skip to next stream */ @@ -450,55 +509,122 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, } } -static void expand_variant_strategy(struct dml2_pmo_instance *pmo, - const enum dml2_pmo_pstate_strategy base_strategy_list[PMO_DCN4_MAX_DISPLAYS], + +static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_strategy, + const struct dml2_pmo_pstate_strategy *variant_strategy, + unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS], + unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS], unsigned int stream_count) { + bool valid = true; unsigned int i; - bool variant_found = false; - enum dml2_pmo_pstate_strategy cur_strategy_list[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + /* check all restrictions are met */ + for (i = 0; i < stream_count; i++) { + /* vblank + vblank_drr variants are invalid */ + if (base_strategy->per_stream_pstate_method[i] == dml2_pmo_pstate_strategy_vblank && + ((num_streams_per_base_method[i] > 0 && num_streams_per_variant_method[i] > 0) || + num_streams_per_variant_method[i] > 1)) { + valid = false; + break; + } + } + + return valid; +} - /* setup variant list as base to start */ - memcpy(cur_strategy_list, base_strategy_list, sizeof(enum dml2_pmo_pstate_strategy) * PMO_DCN4_MAX_DISPLAYS); +static void expand_variant_strategy(struct dml2_pmo_instance *pmo, + const struct dml2_pmo_pstate_strategy *base_strategy, + unsigned int stream_count) +{ + bool variant_found; + unsigned int i, j; + unsigned int method_index; + unsigned int stream_index; + unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; + enum dml2_pmo_pstate_method per_stream_variant_method[DML2_MAX_PLANES]; + struct dml2_pmo_pstate_strategy variant_strategy = { 0 }; + /* determine number of displays per method */ for (i = 0; i < stream_count; i++) { - cur_strategy_list[i] = convert_strategy_to_drr_variant(base_strategy_list[i]); + /* increment the count of the earliest index with the same method */ + for (j = 0; j < stream_count; j++) { + if (base_strategy->per_stream_pstate_method[i] == base_strategy->per_stream_pstate_method[j]) { + num_streams_per_method[j] = num_streams_per_method[j] + 1; + break; + } + } + + per_stream_variant_method[i] = convert_strategy_to_drr_variant(base_strategy->per_stream_pstate_method[i]); + } + memcpy(num_streams_per_base_method, num_streams_per_method, sizeof(unsigned int) * PMO_DCN4_MAX_DISPLAYS); + + memcpy(&variant_strategy, base_strategy, sizeof(struct dml2_pmo_pstate_strategy)); - if (cur_strategy_list[i] != base_strategy_list[i]) { - variant_found = true; + method_index = 0; + /* uses a while loop instead of recursion to build permutations of base strategy */ + while (num_streams_per_base_method[0] > 0 || method_index != 0) { + if (method_index == stream_count) { + /* construct variant strategy */ + variant_found = false; + stream_index = 0; + + for (i = 0; i < stream_count; i++) { + for (j = 0; j < num_streams_per_base_method[i]; j++) { + variant_strategy.per_stream_pstate_method[stream_index++] = base_strategy->per_stream_pstate_method[i]; + } + + for (j = 0; j < num_streams_per_variant_method[i]; j++) { + variant_strategy.per_stream_pstate_method[stream_index++] = per_stream_variant_method[i]; + if (base_strategy->per_stream_pstate_method[i] != per_stream_variant_method[i]) { + variant_found = true; + } + } + } + + if (variant_found && is_variant_method_valid(base_strategy, &variant_strategy, num_streams_per_base_method, num_streams_per_variant_method, stream_count)) { + expand_base_strategy(pmo, &variant_strategy, stream_count); + } + + /* rollback to earliest method with bases remaining */ + for (method_index = stream_count - 1; method_index > 0; method_index--) { + if (num_streams_per_base_method[method_index]) { + /* bases remaining */ + break; + } else { + /* reset counters */ + num_streams_per_base_method[method_index] = num_streams_per_method[method_index]; + num_streams_per_variant_method[method_index] = 0; + } + } } - if (i == stream_count - 1 && variant_found) { - insert_strategy_into_expanded_list(cur_strategy_list, stream_count, &pmo->init_data); + if (num_streams_per_base_method[method_index]) { + num_streams_per_base_method[method_index]--; + num_streams_per_variant_method[method_index]++; + + method_index++; + } else if (method_index != 0) { + method_index++; } } } static void expand_base_strategies( struct dml2_pmo_instance *pmo, - const enum dml2_pmo_pstate_strategy(*base_strategies_list)[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *base_strategies_list, const unsigned int num_base_strategies, unsigned int stream_count) { unsigned int i; - unsigned int num_pre_variant_strategies; - enum dml2_pmo_pstate_strategy(*expanded_strategy_list)[PMO_DCN4_MAX_DISPLAYS]; /* expand every explicit base strategy (except all DRR) */ - for (i = 0; i < num_base_strategies - 1; i++) { - expand_base_strategy(pmo, base_strategies_list[i], stream_count); - } - - /* expand base strategies to DRR variants */ - num_pre_variant_strategies = get_num_expanded_strategies(&pmo->init_data, stream_count); - expanded_strategy_list = get_expanded_strategy_list(&pmo->init_data, stream_count); - for (i = 0; i < num_pre_variant_strategies; i++) { - expand_variant_strategy(pmo, expanded_strategy_list[i], stream_count); + for (i = 0; i < num_base_strategies; i++) { + expand_base_strategy(pmo, &base_strategies_list[i], stream_count); + expand_variant_strategy(pmo, &base_strategies_list[i], stream_count); } - - /* add back all DRR */ - insert_strategy_into_expanded_list(base_strategies_list[num_base_strategies - 1], stream_count, &pmo->init_data); } bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) @@ -591,6 +717,8 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) &in_out->base_display_config->display_config; const struct dml2_core_mode_support_result *mode_support_result = &in_out->base_display_config->mode_support_result; + struct dml2_optimization_stage4_state *state = + &in_out->base_display_config->stage4; if (in_out->instance->options->disable_dyn_odm || (in_out->instance->options->disable_dyn_odm_for_multi_stream && display_config->num_streams > 1)) @@ -611,28 +739,30 @@ bool pmo_dcn4_fams2_init_for_vmin(struct dml2_pmo_init_for_vmin_in_out *in_out) */ if (mode_support_result->cfg_support_info.plane_support_info[i].dpps_used > 1 && mode_support_result->cfg_support_info.stream_support_info[display_config->plane_descriptors[i].stream_index].odms_used == 1) - in_out->base_display_config->stage4.unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; + state->unoptimizable_streams[display_config->plane_descriptors[i].stream_index] = true; for (i = 0; i < display_config->num_streams; i++) { if (display_config->stream_descriptors[i].overrides.disable_dynamic_odm) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; else if (in_out->base_display_config->stage3.stream_svp_meta[i].valid && in_out->instance->options->disable_dyn_odm_for_stream_with_svp) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; /* * ODM Combine requires horizontal timing divisible by 2 so each * ODM segment has the same size. */ else if (!is_h_timing_divisible_by(&display_config->stream_descriptors[i].timing, 2)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; /* * Our hardware support seamless ODM transitions for DP encoders * only. */ else if (!is_dp_encoder(display_config->stream_descriptors[i].output.output_encoder)) - in_out->base_display_config->stage4.unoptimizable_streams[i] = true; + state->unoptimizable_streams[i] = true; } + state->performed = true; + return true; } @@ -783,6 +913,7 @@ static void build_synchronized_timing_groups( /* clear all group masks */ memset(s->pmo_dcn4.synchronized_timing_group_masks, 0, sizeof(s->pmo_dcn4.synchronized_timing_group_masks)); memset(s->pmo_dcn4.group_is_drr_enabled, 0, sizeof(s->pmo_dcn4.group_is_drr_enabled)); + memset(s->pmo_dcn4.group_is_drr_active, 0, sizeof(s->pmo_dcn4.group_is_drr_active)); memset(s->pmo_dcn4.group_line_time_us, 0, sizeof(s->pmo_dcn4.group_line_time_us)); s->pmo_dcn4.num_timing_groups = 0; @@ -804,6 +935,8 @@ static void build_synchronized_timing_groups( /* if drr is in use, timing is not sychnronizable */ if (master_timing->drr_config.enabled) { s->pmo_dcn4.group_is_drr_enabled[timing_group_idx] = true; + s->pmo_dcn4.group_is_drr_active[timing_group_idx] = !master_timing->drr_config.disallowed && + (master_timing->drr_config.drr_active_fixed || master_timing->drr_config.drr_active_variable); continue; } @@ -883,7 +1016,7 @@ static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo, stream_descriptor = &display_config->display_config.stream_descriptors[i]; stream_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[i]; - if (!stream_descriptor->timing.drr_config.enabled || stream_descriptor->overrides.disable_fams2_drr) + if (!stream_descriptor->timing.drr_config.enabled) return false; /* cannot support required vtotal */ @@ -966,35 +1099,24 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, return true; } -static void insert_into_candidate_list(const enum dml2_pmo_pstate_strategy *per_stream_pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) +static void insert_into_candidate_list(const struct dml2_pmo_pstate_strategy *pstate_strategy, int stream_count, struct dml2_pmo_scratch *scratch) { - int stream_index; - - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = true; - - for (stream_index = 0; stream_index < stream_count; stream_index++) { - scratch->pmo_dcn4.per_stream_pstate_strategy[scratch->pmo_dcn4.num_pstate_candidates][stream_index] = per_stream_pstate_strategy[stream_index]; - - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) - scratch->pmo_dcn4.allow_state_increase_for_strategy[scratch->pmo_dcn4.num_pstate_candidates] = false; - } - + scratch->pmo_dcn4.pstate_strategy_candidates[scratch->pmo_dcn4.num_pstate_candidates] = *pstate_strategy; scratch->pmo_dcn4.num_pstate_candidates++; } -static bool all_planes_match_strategy(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_strategy strategy) +static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_method method) { unsigned char i; enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na; - if (strategy == dml2_pmo_pstate_strategy_vactive || strategy == dml2_pmo_pstate_strategy_fw_vactive_drr) + if (method == dml2_pmo_pstate_strategy_vactive || method == dml2_pmo_pstate_strategy_fw_vactive_drr) matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive; - else if (strategy == dml2_pmo_pstate_strategy_vblank || strategy == dml2_pmo_pstate_strategy_fw_vblank_drr) + else if (method == dml2_pmo_pstate_strategy_vblank || method == dml2_pmo_pstate_strategy_fw_vblank_drr) matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank; - else if (strategy == dml2_pmo_pstate_strategy_fw_svp) + else if (method == dml2_pmo_pstate_strategy_fw_svp) matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; - else if (strategy == dml2_pmo_pstate_strategy_fw_drr) + else if (method == dml2_pmo_pstate_strategy_fw_drr) matching_strategy = dml2_uclk_pstate_change_strategy_force_drr; for (i = 0; i < DML2_MAX_PLANES; i++) { @@ -1026,12 +1148,12 @@ static void build_method_scheduling_params( static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( struct dml2_pmo_instance *pmo, - enum dml2_pmo_pstate_strategy stream_pstate_strategy, + enum dml2_pmo_pstate_method stream_pstate_method, int stream_idx) { struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta = NULL; - switch (stream_pstate_strategy) { + switch (stream_pstate_method) { case dml2_pmo_pstate_strategy_vactive: case dml2_pmo_pstate_strategy_fw_vactive_drr: stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vactive.common; @@ -1062,7 +1184,7 @@ static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( static bool is_timing_group_schedulable( struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS], + const struct dml2_pmo_pstate_strategy *pstate_strategy, const unsigned int timing_group_idx, struct dml2_fams2_per_method_common_meta *group_fams2_meta) { @@ -1081,7 +1203,7 @@ static bool is_timing_group_schedulable( } /* init allow start and end lines for timing group */ - stream_method_fams2_meta = get_per_method_common_meta(pmo, per_stream_pstate_strategy[base_stream_idx], base_stream_idx); + stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[base_stream_idx], base_stream_idx); if (!stream_method_fams2_meta) return false; @@ -1090,9 +1212,9 @@ static bool is_timing_group_schedulable( group_fams2_meta->period_us = stream_method_fams2_meta->period_us; for (i = base_stream_idx + 1; i < display_cfg->display_config.num_streams; i++) { if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { - stream_method_fams2_meta = get_per_method_common_meta(pmo, per_stream_pstate_strategy[i], i); + stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); if (!stream_method_fams2_meta) - continue; + return false; if (group_fams2_meta->allow_start_otg_vline < stream_method_fams2_meta->allow_start_otg_vline) { /* set group allow start to larger otg vline */ @@ -1122,7 +1244,7 @@ static bool is_timing_group_schedulable( static bool is_config_schedulable( struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS]) + const struct dml2_pmo_pstate_strategy *pstate_strategy) { unsigned int i, j; bool schedulable; @@ -1145,7 +1267,7 @@ static bool is_config_schedulable( for (i = 0; i < s->pmo_dcn4.num_timing_groups; i++) { s->pmo_dcn4.sorted_group_gtl_disallow_index[i] = i; s->pmo_dcn4.sorted_group_gtl_period_index[i] = i; - if (!is_timing_group_schedulable(pmo, display_cfg, per_stream_pstate_strategy, i, &s->pmo_dcn4.group_common_fams2_meta[i])) { + if (!is_timing_group_schedulable(pmo, display_cfg, pstate_strategy, i, &s->pmo_dcn4.group_common_fams2_meta[i])) { /* synchronized timing group was not schedulable */ schedulable = false; break; @@ -1247,7 +1369,7 @@ static bool is_config_schedulable( unsigned int sorted_ip1 = s->pmo_dcn4.sorted_group_gtl_period_index[i + 1]; if (s->pmo_dcn4.group_common_fams2_meta[sorted_i].allow_time_us < s->pmo_dcn4.group_common_fams2_meta[sorted_ip1].period_us || - s->pmo_dcn4.group_is_drr_enabled[sorted_ip1]) { + (s->pmo_dcn4.group_is_drr_enabled[sorted_ip1] && s->pmo_dcn4.group_is_drr_active[sorted_ip1])) { schedulable = false; break; } @@ -1259,8 +1381,8 @@ static bool is_config_schedulable( /* STAGE 4: When using HW exclusive modes, check disallow alignments are within allowed threshold */ if (s->pmo_dcn4.num_timing_groups == 2 && - !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, per_stream_pstate_strategy[0]) && - !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, per_stream_pstate_strategy[1])) { + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[0]) && + !is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, pstate_strategy->per_stream_pstate_method[1])) { double period_ratio; double max_shift_us; double shift_per_period; @@ -1289,44 +1411,48 @@ static bool is_config_schedulable( } static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, - const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy stream_pstate_strategy, - unsigned int stream_index) + const struct display_configuation_with_meta *display_cfg, + const enum dml2_pmo_pstate_method stream_pstate_method, + unsigned int stream_index) { const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[stream_index]; bool strategy_matches_drr_requirements = true; /* check if strategy is compatible with stream drr capability and strategy */ - if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_strategy) && + if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && display_cfg->display_config.num_streams > 1 && stream_descriptor->timing.drr_config.enabled && (stream_descriptor->timing.drr_config.drr_active_fixed || stream_descriptor->timing.drr_config.drr_active_variable)) { /* DRR is active, so config may become unschedulable */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_strategy) && - is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_NO_DRR_STRATEGY_MASK, stream_pstate_method) && + is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && stream_descriptor->timing.drr_config.enabled && stream_descriptor->timing.drr_config.drr_active_variable) { /* DRR is variable, fw exclusive methods require DRR to be clamped */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && pmo->options->disable_drr_var_when_var_active && stream_descriptor->timing.drr_config.enabled && stream_descriptor->timing.drr_config.drr_active_variable) { /* DRR variable is active, but policy blocks DRR for p-state when this happens */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_DRR_VAR_STRATEGY_MASK, stream_pstate_method) && (pmo->options->disable_drr_var || !stream_descriptor->timing.drr_config.enabled || stream_descriptor->timing.drr_config.disallowed)) { /* DRR variable strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_strategy) && - (pmo->options->disable_drr_clamped || - !stream_descriptor->timing.drr_config.enabled)) { + } else if (is_bit_set_in_bitfield(PMO_DRR_CLAMPED_STRATEGY_MASK, stream_pstate_method) && + (pmo->options->disable_drr_clamped || + (!stream_descriptor->timing.drr_config.enabled || + (!stream_descriptor->timing.drr_config.drr_active_fixed && !stream_descriptor->timing.drr_config.drr_active_variable)) || + (pmo->options->disable_drr_clamped_when_var_active && + stream_descriptor->timing.drr_config.enabled && + stream_descriptor->timing.drr_config.drr_active_variable))) { /* DRR fixed strategies are disallowed due to settings or policy */ strategy_matches_drr_requirements = false; - } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_strategy) && + } else if (is_bit_set_in_bitfield(PMO_FW_STRATEGY_MASK, stream_pstate_method) && pmo->options->disable_fams2) { /* FW modes require FAMS2 */ strategy_matches_drr_requirements = false; @@ -1337,7 +1463,7 @@ static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[PMO_DCN4_MAX_DISPLAYS]) + const struct dml2_pmo_pstate_strategy *pstate_strategy) { struct dml2_pmo_scratch *s = &pmo->scratch; @@ -1358,28 +1484,28 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins // Tabulate everything for (stream_index = 0; stream_index < display_cfg->display_config.num_streams; stream_index++) { - if (!all_planes_match_strategy(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], - per_stream_pstate_strategy[stream_index])) { + if (!all_planes_match_method(display_cfg, s->pmo_dcn4.stream_plane_mask[stream_index], + pstate_strategy->per_stream_pstate_method[stream_index])) { strategy_matches_forced_requirements = false; break; } strategy_matches_drr_requirements &= - stream_matches_drr_policy(pmo, display_cfg, per_stream_pstate_strategy[stream_index], stream_index); + stream_matches_drr_policy(pmo, display_cfg, pstate_strategy->per_stream_pstate_method[stream_index], stream_index); - if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { svp_count++; set_bit_in_bitfield(&svp_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { drr_count++; set_bit_in_bitfield(&drr_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vactive || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { vactive_count++; set_bit_in_bitfield(&vactive_stream_mask, stream_index); - } else if (per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_vblank || - per_stream_pstate_strategy[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { vblank_count++; set_bit_in_bitfield(&vblank_stream_mask, stream_index); } @@ -1388,7 +1514,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins if (!strategy_matches_forced_requirements || !strategy_matches_drr_requirements) return false; - if (vactive_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask))) + if (vactive_count > 0 && !all_timings_support_vactive(pmo, display_cfg, vactive_stream_mask)) return false; if (vblank_count > 0 && (pmo->options->disable_vblank || !all_timings_support_vblank(pmo, display_cfg, vblank_stream_mask))) @@ -1400,7 +1526,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins if (svp_count > 0 && (pmo->options->disable_svp || !all_timings_support_svp(pmo, display_cfg, svp_stream_mask))) return false; - return is_config_schedulable(pmo, display_cfg, per_stream_pstate_strategy); + return is_config_schedulable(pmo, display_cfg, pstate_strategy); } static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask) @@ -1456,6 +1582,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, (stream_fams2_meta->nom_vtotal * timing->h_total); stream_fams2_meta->nom_frame_time_us = (double)stream_fams2_meta->nom_vtotal * stream_fams2_meta->otg_vline_time_us; + stream_fams2_meta->vblank_start = timing->v_blank_end + timing->v_active; if (stream_descriptor->timing.drr_config.enabled == true) { if (stream_descriptor->timing.drr_config.min_refresh_uhz != 0.0) { @@ -1509,7 +1636,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, stream_fams2_meta->method_vactive.common.allow_start_otg_vline = timing->v_blank_end + stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; stream_fams2_meta->method_vactive.common.allow_end_otg_vline = - timing->v_blank_end + timing->v_active - + stream_fams2_meta->vblank_start - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; } else { stream_fams2_meta->method_vactive.common.allow_start_otg_vline = 0; @@ -1519,8 +1646,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, build_method_scheduling_params(&stream_fams2_meta->method_vactive.common, stream_fams2_meta); /* vblank */ - stream_fams2_meta->method_vblank.common.allow_start_otg_vline = - timing->v_blank_end + timing->v_active; + stream_fams2_meta->method_vblank.common.allow_start_otg_vline = stream_fams2_meta->vblank_start; stream_fams2_meta->method_vblank.common.allow_end_otg_vline = stream_fams2_meta->method_vblank.common.allow_start_otg_vline + 1; stream_fams2_meta->method_vblank.common.period_us = stream_fams2_meta->nom_frame_time_us; @@ -1554,8 +1680,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines + stream_fams2_meta->allow_to_target_delay_otg_vlines; stream_fams2_meta->method_subvp.common.allow_end_otg_vline = - stream_fams2_meta->nom_vtotal - - timing->v_front_porch - + stream_fams2_meta->vblank_start - stream_fams2_meta->dram_clk_change_blackout_otg_vlines; stream_fams2_meta->method_subvp.common.period_us = stream_fams2_meta->nom_frame_time_us; build_method_scheduling_params(&stream_fams2_meta->method_subvp.common, stream_fams2_meta); @@ -1564,20 +1689,21 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, stream_fams2_meta->method_drr.programming_delay_otg_vlines = (unsigned int)math_ceil(ip_caps->fams2.drr_programming_delay_us / stream_fams2_meta->otg_vline_time_us); stream_fams2_meta->method_drr.common.allow_start_otg_vline = - stream_fams2_meta->nom_vtotal + + stream_fams2_meta->vblank_start + stream_fams2_meta->allow_to_target_delay_otg_vlines; stream_fams2_meta->method_drr.common.period_us = stream_fams2_meta->nom_frame_time_us; if (display_config->display_config.num_streams <= 1) { /* only need to stretch vblank for blackout time */ stream_fams2_meta->method_drr.stretched_vtotal = - stream_fams2_meta->method_drr.common.allow_start_otg_vline + + stream_fams2_meta->nom_vtotal + + stream_fams2_meta->allow_to_target_delay_otg_vlines + stream_fams2_meta->min_allow_width_otg_vlines + stream_fams2_meta->dram_clk_change_blackout_otg_vlines; } else { /* multi display needs to always be schedulable */ stream_fams2_meta->method_drr.stretched_vtotal = - stream_fams2_meta->method_drr.common.allow_start_otg_vline + - stream_fams2_meta->nom_vtotal + + stream_fams2_meta->nom_vtotal * 2 + + stream_fams2_meta->allow_to_target_delay_otg_vlines + stream_fams2_meta->min_allow_width_otg_vlines + stream_fams2_meta->dram_clk_change_blackout_otg_vlines; } @@ -1610,7 +1736,7 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp struct display_configuation_with_meta *display_config; const struct dml2_plane_parameters *plane_descriptor; - const enum dml2_pmo_pstate_strategy(*strategy_list)[PMO_DCN4_MAX_DISPLAYS] = NULL; + const struct dml2_pmo_pstate_strategy *strategy_list = NULL; unsigned int strategy_list_size = 0; unsigned char plane_index, stream_index, i; @@ -1622,6 +1748,10 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp memset(s, 0, sizeof(struct dml2_pmo_scratch)); + if (display_config->display_config.overrides.all_streams_blanked) { + return true; + } + pmo->scratch.pmo_dcn4.min_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; pmo->scratch.pmo_dcn4.max_latency_index = pmo->mcg_clock_table_size; pmo->scratch.pmo_dcn4.cur_latency_index = in_out->base_display_config->stage1.min_clk_index_for_latency; @@ -1651,6 +1781,9 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp build_synchronized_timing_groups(pmo, display_config); strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams); + if (!strategy_list) + return false; + strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams); if (strategy_list_size == 0) @@ -1659,8 +1792,8 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp s->pmo_dcn4.num_pstate_candidates = 0; for (i = 0; i < strategy_list_size && s->pmo_dcn4.num_pstate_candidates < DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE; i++) { - if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, strategy_list[i])) { - insert_into_candidate_list(strategy_list[i], display_config->display_config.num_streams, s); + if (validate_pstate_support_strategy_cofunctionality(pmo, display_config, &strategy_list[i])) { + insert_into_candidate_list(&strategy_list[i], display_config->display_config.num_streams, s); } } @@ -1777,7 +1910,8 @@ static void setup_planes_for_vblank_by_mask(struct display_configuation_with_met if (is_bit_set_in_bitfield(plane_mask, plane_index)) { plane = &display_config->display_config.plane_descriptors[plane_index]; - plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000); + plane->overrides.reserved_vblank_time_ns = (long)math_max2(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000.0, + plane->overrides.reserved_vblank_time_ns); display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; @@ -1856,26 +1990,26 @@ static bool setup_display_config(struct display_configuation_with_meta *display_ for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (pmo->scratch.pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_na) { + if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) { success = false; break; - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vactive) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive) { setup_planes_for_vactive_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_vblank) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank) { setup_planes_for_vblank_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp) { fams2_required = true; setup_planes_for_svp_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { fams2_required = true; setup_planes_for_vactive_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { fams2_required = true; setup_planes_for_vblank_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { fams2_required = true; setup_planes_for_svp_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.per_stream_pstate_strategy[strategy_index][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { fams2_required = true; setup_planes_for_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); } @@ -1916,6 +2050,10 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp int MIN_VACTIVE_MARGIN_DRR = 0; int REQUIRED_RESERVED_TIME = 0; + if (in_out->base_display_config->display_config.overrides.all_streams_blanked) { + return true; + } + MIN_VACTIVE_MARGIN_VBLANK = INT_MIN; MIN_VACTIVE_MARGIN_DRR = INT_MIN; REQUIRED_RESERVED_TIME = (int)in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us; @@ -1926,34 +2064,34 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { struct dml2_fams2_meta *stream_fams2_meta = &s->pmo_dcn4.stream_fams2_meta[stream_index]; - if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vactive || - s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) || get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_vblank || - s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < REQUIRED_RESERVED_TIME || get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp || - s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - if (!all_planes_match_strategy(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr) || + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr) || get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.per_stream_pstate_strategy[s->pmo_dcn4.cur_pstate_candidate][stream_index] == dml2_pmo_pstate_strategy_na) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) { p_state_supported = false; break; } @@ -1970,8 +2108,8 @@ bool pmo_dcn4_fams2_optimize_for_pstate_support(struct dml2_pmo_optimize_for_pst memcpy(in_out->optimized_display_config, in_out->base_display_config, sizeof(struct display_configuation_with_meta)); if (in_out->last_candidate_failed) { - if (s->pmo_dcn4.allow_state_increase_for_strategy[s->pmo_dcn4.cur_pstate_candidate] && - s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].allow_state_increase && + s->pmo_dcn4.cur_latency_index < s->pmo_dcn4.max_latency_index - 1) { s->pmo_dcn4.cur_latency_index++; success = true; @@ -2059,15 +2197,15 @@ bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in unsigned int i; - for (i = 0; i < in_out->base_display_config->display_config.num_streams; i++) { + for (i = 0; i < in_out->base_display_config->display_config.num_planes; i++) { if (pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us > 0 && pmo->scratch.pmo_dcn4.z8_vblank_optimizable && - in_out->base_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us) { + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.z8_stutter_exit_latency_us * 1000) { success = false; break; } if (pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us > 0 && - in_out->base_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us) { + in_out->base_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns < (int)pmo->soc_bb->power_management_parameters.stutter_enter_plus_exit_latency_us * 1000) { success = false; break; } @@ -2086,8 +2224,11 @@ bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in if (!in_out->last_candidate_failed) { if (pmo->scratch.pmo_dcn4.cur_stutter_candidate < pmo->scratch.pmo_dcn4.num_stutter_candidates) { - for (i = 0; i < in_out->optimized_display_config->display_config.num_streams; i++) { - in_out->optimized_display_config->display_config.stream_descriptors[i].overrides.minimum_vblank_idle_requirement_us = pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate]; + for (i = 0; i < in_out->optimized_display_config->display_config.num_planes; i++) { + /* take the max of the current and the optimal reserved time */ + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns = + (long)math_max2(pmo->scratch.pmo_dcn4.optimal_vblank_reserved_time_for_stutter_us[pmo->scratch.pmo_dcn4.cur_stutter_candidate] * 1000, + in_out->optimized_display_config->display_config.plane_descriptors[i].overrides.reserved_vblank_time_ns); } success = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h index 75175d93add4..0c25bd3e9ac0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_PMO_FAMS2_DCN4_H__ #define __DML2_PMO_FAMS2_DCN4_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c index e0b9ece7901d..add51d41a515 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -2,10 +2,8 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_pmo_factory.h" #include "dml2_pmo_dcn4_fams2.h" -#include "dml2_pmo_dcn4.h" #include "dml2_pmo_dcn3.h" #include "dml2_external_lib_deps.h" @@ -28,15 +26,15 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance * { bool result = false; - if (!out) + if (out == 0) return false; memset(out, 0, sizeof(struct dml2_pmo_instance)); switch (project_id) { case dml2_project_dcn4x_stage1: - out->initialize = pmo_dcn4_initialize; - out->optimize_dcc_mcache = pmo_dcn4_optimize_dcc_mcache; + out->initialize = pmo_dcn4_fams2_initialize; + out->optimize_dcc_mcache = pmo_dcn4_fams2_optimize_dcc_mcache; result = true; break; case dml2_project_dcn4x_stage2: diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h index 9d3dc5e94be1..7218de1824cc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_PMO_FACTORY_H__ #define __DML2_PMO_FACTORY_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c index e73579f1a88e..e17b5ceba447 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "lib_float_math.h" #define ASSERT(condition) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h index 537cf6fd4c15..e13b0c5939b0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_standalone_libraries/lib_float_math.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __LIB_FLOAT_MATH_H__ #define __LIB_FLOAT_MATH_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c index 1b6dbfaa7ae8..dc8af4dd0410 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_top_optimization.h" #include "dml2_internal_shared_types.h" #include "dml_top_mcache.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h index 1536afcbf73a..9f22ab33eab1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_TOP_OPTIMIZATION_H__ #define __DML2_TOP_OPTIMIZATION_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c index 2fb3e2f45e07..f9f8869cd8b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_internal_shared_types.h" #include "dml_top.h" #include "dml2_mcg_factory.h" @@ -28,6 +27,7 @@ bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out) bool result = false; memset(l, 0, sizeof(struct dml2_initialize_instance_locals)); + memset(dml, 0, sizeof(struct dml2_instance)); memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb)); @@ -96,14 +96,12 @@ bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) { struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals; - /* Borrow the build_mode_programming_locals programming struct for DPMM call. */ - struct dml2_display_cfg_programming *dpmm_programming = dml->scratch.build_mode_programming_locals.mode_programming_params.programming; + struct dml2_display_cfg_programming *dpmm_programming = &dml->dpmm_instance.dpmm_scratch.programming; bool result = false; bool mcache_success = false; - if (dpmm_programming) - memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); + memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); @@ -130,7 +128,7 @@ bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) /* * Call DPMM to map all requirements to minimum clock state */ - if (result && dpmm_programming) { + if (result) { l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; l->dppm_map_mode_params.programming = dpmm_programming; @@ -268,9 +266,18 @@ bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_o vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); - if (vmin_success) { + if (l->optimized_display_config_with_meta.stage4.performed) { + /* + * when performed is true, optimization has applied to + * optimized_display_config_with_meta and it has passed mode + * support. However it may or may not pass the test function to + * reach actual Vmin. As long as voltage is optimized even if it + * doesn't reach Vmin level, there is still power benefit so in + * this case we will still copy this optimization into base + * display config. + */ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); - l->base_display_config_with_meta.stage4.success = true; + l->base_display_config_with_meta.stage4.success = vmin_success; } /* diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c index 7afd417071a5..a342ebfbe4e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_debug.h" #include "dml_top_mcache.h" @@ -143,12 +142,12 @@ static unsigned int count_elements_in_span(int *array, unsigned int array_size, while (span_start_index < array_size) { for (i = span_start_index; i < array_size; i++) { - if (array[i] - span_start_value > span) { + if (array[i] - span_start_value <= span) { if (i - span_start_index + 1 > greatest_element_count) { greatest_element_count = i - span_start_index + 1; } + } else break; - } } span_start_index++; @@ -208,9 +207,9 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi int temp, p0shift, p1shift; unsigned int plane_index = 0; unsigned int i; - char odm_combine_factor = 1; - char mpc_combine_factor = 1; - char num_dpps; + unsigned int odm_combine_factor; + unsigned int mpc_combine_factor; + unsigned int num_dpps; unsigned int num_boundaries; enum dml2_scaling_transform scaling_transform; const struct dml2_plane_parameters *plane; @@ -227,10 +226,10 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi plane = ¶ms->display_cfg->plane_descriptors[plane_index]; stream = ¶ms->display_cfg->stream_descriptors[plane->stream_index]; - odm_combine_factor = (char)params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; + num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; if (odm_combine_factor == 1) - mpc_combine_factor = (char)params->cfg_support_info->plane_support_info[plane_index].dpps_used; + num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used; else mpc_combine_factor = 1; @@ -260,13 +259,13 @@ bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissabi // The last element in the unshifted boundary array will always be the first pixel outside the // plane, which means theres no mcache associated with it, so -1 num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1; - if (count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, - num_boundaries, max_per_pipe_vp_p0) <= 1) { + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) { p0pass = true; } num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1; - if (count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, - num_boundaries, max_per_pipe_vp_p1) <= 1) { + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) { p1pass = true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h index bb12e4c30690..7b1f6f7143d0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML_TOP_MCACHE_H__ #define __DML_TOP_MCACHE_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c index de7d8a6a2d3d..e9b8e10695ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #include "dml2_debug.h" int dml2_printf(const char *format, ...) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h index 0403238df107..d51a1b6c62f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_DEBUG_H__ #define __DML2_DEBUG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h index 5632cdacb7f4..aeac9f159fa5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. - #ifndef __DML2_INTERNAL_SHARED_TYPES_H__ #define __DML2_INTERNAL_SHARED_TYPES_H__ @@ -107,10 +106,16 @@ struct dml2_dpmm_map_watermarks_params_in_out { struct dml2_display_cfg_programming *programming; }; +struct dml2_dpmm_scratch { + struct dml2_display_cfg_programming programming; +}; + struct dml2_dpmm_instance { bool (*map_mode_to_soc_dpm)(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out); bool (*map_watermarks)(struct dml2_dpmm_map_watermarks_params_in_out *in_out); bool (*unit_test)(void); + + struct dml2_dpmm_scratch dpmm_scratch; }; /* @@ -266,6 +271,7 @@ struct dml2_fams2_meta { unsigned int contention_delay_otg_vlines; unsigned int min_allow_width_otg_vlines; unsigned int nom_vtotal; + unsigned int vblank_start; double nom_refresh_rate_hz; double nom_frame_time_us; unsigned int max_vtotal; @@ -594,7 +600,7 @@ struct dml2_pmo_optimize_for_stutter_in_out { struct display_configuation_with_meta *optimized_display_config; }; -enum dml2_pmo_pstate_strategy { +enum dml2_pmo_pstate_method { dml2_pmo_pstate_strategy_na = 0, /* hw exclusive modes */ dml2_pmo_pstate_strategy_vactive = 1, @@ -612,6 +618,11 @@ enum dml2_pmo_pstate_strategy { dml2_pmo_pstate_strategy_reserved_fw_drr_var = 22, }; +struct dml2_pmo_pstate_strategy { + enum dml2_pmo_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; + bool allow_state_increase; +}; + #define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw - dml2_pmo_pstate_strategy_na + 1)) - 1) << dml2_pmo_pstate_strategy_na) #define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) #define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_clamped - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) @@ -634,8 +645,7 @@ struct dml2_pmo_scratch { int stream_mask; } pmo_dcn3; struct { - enum dml2_pmo_pstate_strategy per_stream_pstate_strategy[DML2_MAX_PLANES][DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; - bool allow_state_increase_for_strategy[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; + struct dml2_pmo_pstate_strategy pstate_strategy_candidates[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; int num_pstate_candidates; int cur_pstate_candidate; @@ -661,6 +671,7 @@ struct dml2_pmo_scratch { unsigned int num_timing_groups; unsigned int synchronized_timing_group_masks[DML2_MAX_PLANES]; bool group_is_drr_enabled[DML2_MAX_PLANES]; + bool group_is_drr_active[DML2_MAX_PLANES]; double group_line_time_us[DML2_MAX_PLANES]; /* scheduling check locals */ @@ -676,10 +687,10 @@ struct dml2_pmo_init_data { union { struct { /* populated once during initialization */ - enum dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2][PMO_DCN4_MAX_DISPLAYS]; - enum dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2 * 2][PMO_DCN4_MAX_DISPLAYS]; - enum dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 2][PMO_DCN4_MAX_DISPLAYS]; - enum dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 24 * 2][PMO_DCN4_MAX_DISPLAYS]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_1_display[PMO_DCN4_MAX_BASE_STRATEGIES * 2]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_2_display[PMO_DCN4_MAX_BASE_STRATEGIES * 4 * 4]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_3_display[PMO_DCN4_MAX_BASE_STRATEGIES * 6 * 6 * 6]; + struct dml2_pmo_pstate_strategy expanded_strategy_list_4_display[PMO_DCN4_MAX_BASE_STRATEGIES * 8 * 8 * 8 * 8]; unsigned int num_expanded_strategies_per_list[PMO_DCN4_MAX_DISPLAYS]; } pmo_dcn4; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h index b566f53608c6..3ba184be25d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_internal_types.h @@ -101,6 +101,7 @@ struct dml2_wrapper_scratch { struct dml2_dml_to_dc_pipe_mapping dml_to_dc_pipe_mapping; bool enable_flexible_pipe_mapping; bool plane_duplicate_exists; + unsigned int dp2_mst_stream_count; }; struct dml2_helper_det_policy_scratch { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 8b9dcee77266..7e39873832bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -733,7 +733,8 @@ static void populate_dml_timing_cfg_from_stream_state(struct dml_timing_cfg_st * } static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *out, unsigned int location, - const struct dc_stream_state *in, const struct pipe_ctx *pipe) + const struct dc_stream_state *in, const struct pipe_ctx *pipe, + unsigned int dp2_mst_stream_count) { unsigned int output_bpc; @@ -746,7 +747,7 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st * case SIGNAL_TYPE_DISPLAY_PORT_MST: case SIGNAL_TYPE_DISPLAY_PORT: out->OutputEncoder[location] = dml_dp; - if (is_dp2p0_output_encoder(pipe)) + if (is_dp2p0_output_encoder(pipe, dp2_mst_stream_count)) out->OutputEncoder[location] = dml_dp2p0; break; case SIGNAL_TYPE_EDP: @@ -953,7 +954,9 @@ static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc memcpy(out, &temp_pipe->plane_res.scl_data, sizeof(*out)); } -static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_stream_state *in) +static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, + const struct dc_stream_state *in, + const struct soc_bounding_box_st *soc) { dml_uint_t width, height; @@ -970,7 +973,7 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; - out->GPUVMMinPageSizeKBytes[location] = 256; + out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; out->ViewportWidth[location] = width; out->ViewportHeight[location] = height; @@ -1007,7 +1010,9 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned out->ScalerEnabled[location] = false; } -static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, struct dc_state *context) +static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, + const struct dc_plane_state *in, struct dc_state *context, + const struct soc_bounding_box_st *soc) { struct scaler_data *scaler_data = kzalloc(sizeof(*scaler_data), GFP_KERNEL); if (!scaler_data) @@ -1018,7 +1023,7 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; - out->GPUVMMinPageSizeKBytes[location] = 256; + out->GPUVMMinPageSizeKBytes[location] = soc->gpuvm_min_page_size_kbytes; out->ViewportWidth[location] = scaler_data->viewport.width; out->ViewportHeight[location] = scaler_data->viewport.height; @@ -1193,6 +1198,37 @@ static void dml2_populate_pipe_to_plane_index_mapping(struct dml2_context *dml2, plane_index = 0; } } + +static unsigned int calculate_dp2_mst_stream_count(struct dc_state *context) +{ + int i, j; + unsigned int dp2_mst_stream_count = 0; + + for (i = 0; i < context->stream_count; i++) { + struct dc_stream_state *stream = context->streams[i]; + + if (!stream || stream->signal != SIGNAL_TYPE_DISPLAY_PORT_MST) + continue; + + for (j = 0; j < MAX_PIPES; j++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; + + if (!pipe_ctx || !pipe_ctx->stream) + continue; + + if (stream != pipe_ctx->stream) + continue; + + if (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc) { + dp2_mst_stream_count++; + break; + } + } + } + + return dp2_mst_stream_count; +} + static void populate_dml_writeback_cfg_from_stream_state(struct dml_writeback_cfg_st *out, unsigned int location, const struct dc_stream_state *in) { @@ -1255,6 +1291,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (dml2->v20.dml_core_ctx.ip.hostvm_enable) dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter; + dml2->v20.scratch.dp2_mst_stream_count = calculate_dp2_mst_stream_count(context); dml2_populate_pipe_to_plane_index_mapping(dml2, context); for (i = 0; i < context->stream_count; i++) { @@ -1276,7 +1313,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2->v20.scratch.dp2_mst_stream_count); /*Call site for populate_dml_writeback_cfg_from_stream_state*/ populate_dml_writeback_cfg_from_stream_state(&dml_dispcfg->writeback, disp_cfg_stream_location, context->streams[i]); @@ -1299,7 +1336,8 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat disp_cfg_plane_location = dml_dispcfg->num_surfaces++; populate_dummy_dml_surface_cfg(&dml_dispcfg->surface, disp_cfg_plane_location, context->streams[i]); - populate_dummy_dml_plane_cfg(&dml_dispcfg->plane, disp_cfg_plane_location, context->streams[i]); + populate_dummy_dml_plane_cfg(&dml_dispcfg->plane, disp_cfg_plane_location, + context->streams[i], &dml2->v20.dml_core_ctx.soc); dml_dispcfg->plane.BlendingAndTiming[disp_cfg_plane_location] = disp_cfg_stream_location; @@ -1315,7 +1353,10 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); - populate_dml_plane_cfg_from_plane_state(&dml_dispcfg->plane, disp_cfg_plane_location, context->stream_status[i].plane_states[j], context); + populate_dml_plane_cfg_from_plane_state( + &dml_dispcfg->plane, disp_cfg_plane_location, + context->stream_status[i].plane_states[j], context, + &dml2->v20.dml_core_ctx.soc); if (stream_mall_type == SUBVP_MAIN) { dml_dispcfg->plane.UseMALLForPStateChange[disp_cfg_plane_location] = dml_use_mall_pstate_change_sub_viewport; @@ -1337,7 +1378,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (j >= 1) { populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_plane_location, context->streams[i]); - populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context); + populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_plane_location, context->streams[i], current_pipe_context, dml2->v20.scratch.dp2_mst_stream_count); switch (context->streams[i]->debug.force_odm_combine_segments) { case 2: dml2->v20.dml_core_ctx.policy.ODMUse[disp_cfg_plane_location] = dml_odm_use_policy_combine_2to1; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h index d764773938f4..55659b22d87f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.h @@ -36,6 +36,6 @@ void dml2_translate_socbb_params(const struct dc *in_dc, struct soc_bounding_box void dml2_translate_soc_states(const struct dc *in_dc, struct soc_states_st *out, int num_states); void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_state *context, struct dml_display_cfg_st *dml_dispcfg); void dml2_update_pipe_ctx_dchub_regs(struct _vcs_dpi_dml_display_rq_regs_st *rq_regs, struct _vcs_dpi_dml_display_dlg_regs_st *disp_dlg_regs, struct _vcs_dpi_dml_display_ttu_regs_st *disp_ttu_regs, struct pipe_ctx *out); -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe); +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe, unsigned int dp2_mst_stream_count); #endif //__DML2_TRANSLATION_HELPER_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c index 92238ff333a4..9e8ff3a9718e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_utils.c @@ -153,7 +153,7 @@ unsigned int dml2_util_get_maximum_odm_combine_for_output(bool force_odm_4to1, e } } -bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) +bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx, unsigned int dp2_mst_stream_count) { if (pipe_ctx == NULL || pipe_ctx->stream == NULL) return false; @@ -162,7 +162,7 @@ bool is_dp2p0_output_encoder(const struct pipe_ctx *pipe_ctx) ASSERT(pipe_ctx->stream_res.hpo_dp_stream_enc ? pipe_ctx->link_res.hpo_dp_link_enc != NULL : true); /* Count MST hubs once by treating only 1st remote sink in topology as an encoder */ - if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0]) { + if (pipe_ctx->stream->link && pipe_ctx->stream->link->remote_sinks[0] && dp2_mst_stream_count > 1) { return (pipe_ctx->stream_res.hpo_dp_stream_enc && pipe_ctx->link_res.hpo_dp_link_enc && dc_is_dp_signal(pipe_ctx->stream->signal) && @@ -181,7 +181,7 @@ bool is_dtbclk_required(const struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; - if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i])) + if (is_dp2p0_output_encoder(&context->res_ctx.pipe_ctx[i], context->bw_ctx.dml2->v20.scratch.dp2_mst_stream_count)) return true; } return false; @@ -421,7 +421,7 @@ unsigned int dml2_calc_max_scaled_time( void dml2_extract_writeback_wm(struct dc_state *context, struct display_mode_lib_st *dml_core_ctx) { - int i, j = 0;; + int i, j = 0; struct mcif_arb_params *wb_arb_params = NULL; struct dcn_bw_writeback *bw_writeback = NULL; enum mmhubbub_wbif_mode wbif_mode = PACKED_444_FP16; /*for now*/ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index d5dcc8b77281..866b0abcff1b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -575,7 +575,7 @@ static bool dml2_validate_and_build_resource(const struct dc *in_dc, struct dc_s unsigned int lowest_state_idx = 0; out_clks.p_state_supported = true; - out_clks.dispclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dispclk_mhz * 1000; + out_clks.dispclk_khz = 0; /* No requirement, and lowest index will generally be maximum dispclk. */ out_clks.dcfclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dcfclk_mhz * 1000; out_clks.fclk_khz = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].fabricclk_mhz * 1000; out_clks.uclk_mts = (unsigned int)dml2->v20.dml_core_ctx.states.state_array[lowest_state_idx].dram_speed_mts; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 023325e8f6e2..0f944fcfd5a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -236,6 +236,7 @@ struct dml2_configuration_options { bool use_clock_dc_limits; bool gpuvm_enable; + bool force_tdlut_enable; struct dml2_soc_bb *bb_from_dmub; }; diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c index f2a2d53e9689..f8f6019d8304 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn10/dcn10_dpp_cm.c @@ -684,9 +684,6 @@ void dpp1_set_degamma( BREAK_TO_DEBUGGER(); break; } - - REG_SEQ_SUBMIT(); - REG_SEQ_WAIT_DONE(); } void dpp1_degamma_ram_select( diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c index e16274fee31d..8473c694bfdc 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.c @@ -59,6 +59,31 @@ void dpp35_dppclk_control( DISPCLK_R_GATE_DISABLE, 0); } +void dpp35_program_bias_and_scale_fcnv( + struct dpp *dpp_base, + struct dc_bias_and_scale *params) +{ + struct dcn20_dpp *dpp = TO_DCN20_DPP(dpp_base); + + if (!params->bias_and_scale_valid) { + REG_SET(FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, 0); + REG_SET(FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, 0); + REG_SET(FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, 0); + + REG_SET(FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, 0x1F000); + REG_SET(FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, 0x1F000); + REG_SET(FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, 0x1F000); + } else { + REG_SET(FCNV_FP_BIAS_R, 0, FCNV_FP_BIAS_R, params->bias_red); + REG_SET(FCNV_FP_BIAS_G, 0, FCNV_FP_BIAS_G, params->bias_green); + REG_SET(FCNV_FP_BIAS_B, 0, FCNV_FP_BIAS_B, params->bias_blue); + + REG_SET(FCNV_FP_SCALE_R, 0, FCNV_FP_SCALE_R, params->scale_red); + REG_SET(FCNV_FP_SCALE_G, 0, FCNV_FP_SCALE_G, params->scale_green); + REG_SET(FCNV_FP_SCALE_B, 0, FCNV_FP_SCALE_B, params->scale_blue); + } +} + static struct dpp_funcs dcn35_dpp_funcs = { .dpp_program_gamcor_lut = dpp3_program_gamcor_lut, .dpp_read_state = dpp30_read_state, @@ -81,7 +106,7 @@ static struct dpp_funcs dcn35_dpp_funcs = { .dpp_program_shaper_lut = NULL, // CM SHAPER block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) .dpp_program_3dlut = NULL, // CM 3DLUT block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) - .dpp_program_bias_and_scale = NULL, + .dpp_program_bias_and_scale = dpp35_program_bias_and_scale_fcnv, .dpp_cnv_set_alpha_keyer = dpp2_cnv_set_alpha_keyer, .set_cursor_attributes = dpp3_set_cursor_attributes, .set_cursor_position = dpp1_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h index 135872d88219..3ca339a16e5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn35/dcn35_dpp.h @@ -61,4 +61,7 @@ bool dpp35_construct(struct dcn3_dpp *dpp3, struct dc_context *ctx, void dpp35_set_fgcg(struct dcn3_dpp *dpp, bool enable); +void dpp35_program_bias_and_scale_fcnv(struct dpp *dpp_base, + struct dc_bias_and_scale *bias_and_scale); + #endif // __DCN35_DPP_H diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c index 7cae18fd7be9..97bf26fa3573 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp.c @@ -30,6 +30,7 @@ #include "basics/conversion.h" #include "dcn30/dcn30_cm_common.h" #include "dcn32/dcn32_dpp.h" +#include "dcn35/dcn35_dpp.h" #define REG(reg)\ dpp->tf_regs->reg @@ -240,7 +241,7 @@ static struct dpp_funcs dcn401_dpp_funcs = { .dpp_program_shaper_lut = NULL, // CM SHAPER block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) .dpp_program_3dlut = NULL, // CM 3DLUT block is removed in DCN3.2 DPP, (it is in MPCC, programmable before or after BLND) - .dpp_program_bias_and_scale = NULL, + .dpp_program_bias_and_scale = dpp35_program_bias_and_scale_fcnv, .dpp_cnv_set_alpha_keyer = dpp2_cnv_set_alpha_keyer, .set_cursor_attributes = dpp401_set_cursor_attributes, .set_cursor_position = dpp401_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index d0f8c9ff5232..3b6ca7974e18 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -120,11 +120,10 @@ void dpp401_set_cursor_attributes( enum dc_cursor_color_format color_format = cursor_attributes->color_format; int cur_rom_en = 0; + // DCN4 should always do Cursor degamma for Cursor Color modes if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { - cur_rom_en = 1; - } + cur_rom_en = 1; } REG_UPDATE_3(CURSOR0_CONTROL, @@ -246,16 +245,6 @@ void dpp401_set_cursor_matrix( enum dc_color_space color_space, struct dc_csc_transform cursor_csc_color_matrix) { - struct dpp_input_csc_matrix cursor_tbl_entry; - unsigned int i; - - if (cursor_csc_color_matrix.enable_adjustment == true) { - for (i = 0; i < 12; i++) - cursor_tbl_entry.regval[i] = cursor_csc_color_matrix.matrix[i]; - - cursor_tbl_entry.color_space = color_space; - dpp401_program_cursor_csc(dpp_base, color_space, &cursor_tbl_entry); - } else { - dpp401_program_cursor_csc(dpp_base, color_space, NULL); - } + //Since we don't have cursor matrix information, force bypass mode by passing in unknown color space + dpp401_program_cursor_csc(dpp_base, COLOR_SPACE_UNKNOWN, NULL); } diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c index 6acb6699f146..61678b0a5a1e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c @@ -27,7 +27,7 @@ static void dsc401_disconnect(struct display_stream_compressor *dsc); static void dsc401_wait_disconnect_pending_clear(struct display_stream_compressor *dsc); static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); -const struct dsc_funcs dcn401_dsc_funcs = { +static const struct dsc_funcs dcn401_dsc_funcs = { .dsc_get_enc_caps = dsc401_get_enc_caps, .dsc_read_state = dsc401_read_state, .dsc_validate_stream = dsc401_validate_stream, diff --git a/drivers/gpu/drm/amd/display/dc/dwb/Makefile b/drivers/gpu/drm/amd/display/dc/dwb/Makefile index 16f7a454fed9..3952ba4cd508 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dwb/Makefile @@ -25,6 +25,15 @@ ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### +# DCN30 +############################################################################### +DWB_DCN30 = dcn30_dwb.o dcn30_dwb_cm.o + +AMD_DAL_DWB_DCN30 = $(addprefix $(AMDDALPATH)/dc/dwb/dcn30/,$(DWB_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_DWB_DCN30) + +############################################################################### # DCN35 ############################################################################### DWB_DCN35 = dcn35_dwb.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h index bd98b327a6c7..bd98b327a6c7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.h +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c index fae98cf52020..fae98cf52020 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.c +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h index 0f3f7c5fbaec..0f3f7c5fbaec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c index 03a50c32fcfe..03a50c32fcfe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb_cm.c diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c index b23a809999ed..d5e8294f5a16 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn35/dcn35_dwb.c @@ -21,7 +21,6 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ - #include "reg_helper.h" #include "dcn35_dwb.h" diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c index 46415cab23ab..928abca18a18 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn401/hw_factory_dcn401.c @@ -86,7 +86,13 @@ static const struct ddc_registers ddc_data_regs_dcn[] = { ddc_data_regs_dcn2(2), ddc_data_regs_dcn2(3), ddc_data_regs_dcn2(4), -// ddc_data_regs_dcn2(5), + { + // add a dummy entry for cases no such port + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, + .ddc_setup = 0, + .phy_aux_cntl = 0, + .dc_gpio_aux_ctrl_5 = 0 + }, { // add a dummy entry for cases no such port {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, @@ -107,7 +113,13 @@ static const struct ddc_registers ddc_clk_regs_dcn[] = { ddc_clk_regs_dcn2(2), ddc_clk_regs_dcn2(3), ddc_clk_regs_dcn2(4), -// ddc_clk_regs_dcn2(5), + { + // add a dummy entry for cases no such port + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, + .ddc_setup = 0, + .phy_aux_cntl = 0, + .dc_gpio_aux_ctrl_5 = 0 + }, { // add a dummy entry for cases no such port {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, diff --git a/drivers/gpu/drm/amd/display/dc/hpo/Makefile b/drivers/gpu/drm/amd/display/dc/hpo/Makefile index c248bd86b477..7f2c9ee0dff1 100644 --- a/drivers/gpu/drm/amd/display/dc/hpo/Makefile +++ b/drivers/gpu/drm/amd/display/dc/hpo/Makefile @@ -25,6 +25,21 @@ ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### +# DCN30 +############################################################################### + +AMD_DAL_HPO_DCN30 = $(addprefix $(AMDDALPATH)/dc/hpo/dcn30/,$(HPO_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_HPO_DCN30) +############################################################################### +# DCN31 +############################################################################### +HPO_DCN31 = dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o + +AMD_DAL_HPO_DCN31 = $(addprefix $(AMDDALPATH)/dc/hpo/dcn31/,$(HPO_DCN31)) + +AMD_DISPLAY_FILES += $(AMD_DAL_HPO_DCN31) +############################################################################### # DCN32 ############################################################################### HPO_DCN32 = dcn32_hpo_dp_link_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c index 03b4ac2f1991..03b4ac2f1991 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h index 51f5781325e8..51f5781325e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_link_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c index 678db949cfe3..678db949cfe3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h index 82c3b3ac1f0d..82c3b3ac1f0d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/hpo/dcn31/dcn31_hpo_dp_stream_encoder.h diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index 181041d6d177..37d26fa0b6fb 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -75,108 +75,108 @@ bool hubbub401_program_urgent_watermarks( /* Repeat for water mark set A and B */ /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.urgent > hubbub2->watermarks.dcn4.a.urgent) { - hubbub2->watermarks.dcn4.a.urgent = watermarks->dcn4.a.urgent; + if (safe_to_lower || watermarks->dcn4x.a.urgent > hubbub2->watermarks.dcn4x.a.urgent) { + hubbub2->watermarks.dcn4x.a.urgent = watermarks->dcn4x.a.urgent; REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0, - DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, watermarks->dcn4.a.urgent); + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, watermarks->dcn4x.a.urgent); DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.a.urgent, watermarks->dcn4.a.urgent); - } else if (watermarks->dcn4.a.urgent < hubbub2->watermarks.dcn4.a.urgent) + watermarks->dcn4x.a.urgent, watermarks->dcn4x.a.urgent); + } else if (watermarks->dcn4x.a.urgent < hubbub2->watermarks.dcn4x.a.urgent) wm_pending = true; /* determine the transfer time for a quantity of data for a particular requestor.*/ - if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_flip - > hubbub2->watermarks.dcn4.a.frac_urg_bw_flip) { - hubbub2->watermarks.dcn4.a.frac_urg_bw_flip = watermarks->dcn4.a.frac_urg_bw_flip; + if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_flip + > hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip) { + hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip = watermarks->dcn4x.a.frac_urg_bw_flip; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, 0, - DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, watermarks->dcn4.a.frac_urg_bw_flip); - } else if (watermarks->dcn4.a.frac_urg_bw_flip - < hubbub2->watermarks.dcn4.a.frac_urg_bw_flip) + DCHUBBUB_ARB_FRAC_URG_BW_FLIP_A, watermarks->dcn4x.a.frac_urg_bw_flip); + } else if (watermarks->dcn4x.a.frac_urg_bw_flip + < hubbub2->watermarks.dcn4x.a.frac_urg_bw_flip) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_nom - > hubbub2->watermarks.dcn4.a.frac_urg_bw_nom) { - hubbub2->watermarks.dcn4.a.frac_urg_bw_nom = watermarks->dcn4.a.frac_urg_bw_nom; + if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_nom + > hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom) { + hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom = watermarks->dcn4x.a.frac_urg_bw_nom; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, 0, - DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, watermarks->dcn4.a.frac_urg_bw_nom); - } else if (watermarks->dcn4.a.frac_urg_bw_nom - < hubbub2->watermarks.dcn4.a.frac_urg_bw_nom) + DCHUBBUB_ARB_FRAC_URG_BW_NOM_A, watermarks->dcn4x.a.frac_urg_bw_nom); + } else if (watermarks->dcn4x.a.frac_urg_bw_nom + < hubbub2->watermarks.dcn4x.a.frac_urg_bw_nom) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.frac_urg_bw_mall - > hubbub2->watermarks.dcn4.a.frac_urg_bw_mall) { - hubbub2->watermarks.dcn4.a.frac_urg_bw_mall = watermarks->dcn4.a.frac_urg_bw_mall; + if (safe_to_lower || watermarks->dcn4x.a.frac_urg_bw_mall + > hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall) { + hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall = watermarks->dcn4x.a.frac_urg_bw_mall; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, 0, - DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, watermarks->dcn4.a.frac_urg_bw_mall); - } else if (watermarks->dcn4.a.frac_urg_bw_mall < hubbub2->watermarks.dcn4.a.frac_urg_bw_mall) + DCHUBBUB_ARB_FRAC_URG_BW_MALL_A, watermarks->dcn4x.a.frac_urg_bw_mall); + } else if (watermarks->dcn4x.a.frac_urg_bw_mall < hubbub2->watermarks.dcn4x.a.frac_urg_bw_mall) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem) { - hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem = watermarks->dcn4.a.refcyc_per_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.a.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem) { + hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem = watermarks->dcn4x.a.refcyc_per_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, 0, - DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, watermarks->dcn4.a.refcyc_per_trip_to_mem); - } else if (watermarks->dcn4.a.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4.a.refcyc_per_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, watermarks->dcn4x.a.refcyc_per_trip_to_mem); + } else if (watermarks->dcn4x.a.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4x.a.refcyc_per_trip_to_mem) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem) { - hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem = watermarks->dcn4.a.refcyc_per_meta_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem) { + hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem = watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, 0, - DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, watermarks->dcn4.a.refcyc_per_meta_trip_to_mem); - } else if (watermarks->dcn4.a.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4.a.refcyc_per_meta_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_META_TRIP_A, watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem); + } else if (watermarks->dcn4x.a.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4x.a.refcyc_per_meta_trip_to_mem) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.urgent > hubbub2->watermarks.dcn4.b.urgent) { - hubbub2->watermarks.dcn4.b.urgent = watermarks->dcn4.b.urgent; + if (safe_to_lower || watermarks->dcn4x.b.urgent > hubbub2->watermarks.dcn4x.b.urgent) { + hubbub2->watermarks.dcn4x.b.urgent = watermarks->dcn4x.b.urgent; REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, 0, - DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, watermarks->dcn4.b.urgent); + DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, watermarks->dcn4x.b.urgent); DC_LOG_BANDWIDTH_CALCS("URGENCY_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.b.urgent, watermarks->dcn4.b.urgent); - } else if (watermarks->dcn4.b.urgent < hubbub2->watermarks.dcn4.b.urgent) + watermarks->dcn4x.b.urgent, watermarks->dcn4x.b.urgent); + } else if (watermarks->dcn4x.b.urgent < hubbub2->watermarks.dcn4x.b.urgent) wm_pending = true; /* determine the transfer time for a quantity of data for a particular requestor.*/ - if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_flip - > hubbub2->watermarks.dcn4.b.frac_urg_bw_flip) { - hubbub2->watermarks.dcn4.b.frac_urg_bw_flip = watermarks->dcn4.b.frac_urg_bw_flip; + if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_flip + > hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip) { + hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip = watermarks->dcn4x.b.frac_urg_bw_flip; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, 0, - DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, watermarks->dcn4.b.frac_urg_bw_flip); - } else if (watermarks->dcn4.b.frac_urg_bw_flip - < hubbub2->watermarks.dcn4.b.frac_urg_bw_flip) + DCHUBBUB_ARB_FRAC_URG_BW_FLIP_B, watermarks->dcn4x.b.frac_urg_bw_flip); + } else if (watermarks->dcn4x.b.frac_urg_bw_flip + < hubbub2->watermarks.dcn4x.b.frac_urg_bw_flip) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_nom - > hubbub2->watermarks.dcn4.b.frac_urg_bw_nom) { - hubbub2->watermarks.dcn4.b.frac_urg_bw_nom = watermarks->dcn4.b.frac_urg_bw_nom; + if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_nom + > hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom) { + hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom = watermarks->dcn4x.b.frac_urg_bw_nom; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, 0, - DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, watermarks->dcn4.b.frac_urg_bw_nom); - } else if (watermarks->dcn4.b.frac_urg_bw_nom - < hubbub2->watermarks.dcn4.b.frac_urg_bw_nom) + DCHUBBUB_ARB_FRAC_URG_BW_NOM_B, watermarks->dcn4x.b.frac_urg_bw_nom); + } else if (watermarks->dcn4x.b.frac_urg_bw_nom + < hubbub2->watermarks.dcn4x.b.frac_urg_bw_nom) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.frac_urg_bw_mall - > hubbub2->watermarks.dcn4.b.frac_urg_bw_mall) { - hubbub2->watermarks.dcn4.b.frac_urg_bw_mall = watermarks->dcn4.b.frac_urg_bw_mall; + if (safe_to_lower || watermarks->dcn4x.b.frac_urg_bw_mall + > hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall) { + hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall = watermarks->dcn4x.b.frac_urg_bw_mall; REG_SET(DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, 0, - DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, watermarks->dcn4.b.frac_urg_bw_mall); - } else if (watermarks->dcn4.b.frac_urg_bw_mall < hubbub2->watermarks.dcn4.b.frac_urg_bw_mall) + DCHUBBUB_ARB_FRAC_URG_BW_MALL_B, watermarks->dcn4x.b.frac_urg_bw_mall); + } else if (watermarks->dcn4x.b.frac_urg_bw_mall < hubbub2->watermarks.dcn4x.b.frac_urg_bw_mall) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem) { - hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem = watermarks->dcn4.b.refcyc_per_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.b.refcyc_per_trip_to_mem > hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem) { + hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem = watermarks->dcn4x.b.refcyc_per_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, 0, - DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, watermarks->dcn4.b.refcyc_per_trip_to_mem); - } else if (watermarks->dcn4.b.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4.b.refcyc_per_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, watermarks->dcn4x.b.refcyc_per_trip_to_mem); + } else if (watermarks->dcn4x.b.refcyc_per_trip_to_mem < hubbub2->watermarks.dcn4x.b.refcyc_per_trip_to_mem) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem) { - hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem = watermarks->dcn4.b.refcyc_per_meta_trip_to_mem; + if (safe_to_lower || watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem > hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem) { + hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem = watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem; REG_SET(DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, 0, - DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, watermarks->dcn4.b.refcyc_per_meta_trip_to_mem); - } else if (watermarks->dcn4.b.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4.b.refcyc_per_meta_trip_to_mem) + DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B, watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem); + } else if (watermarks->dcn4x.b.refcyc_per_meta_trip_to_mem < hubbub2->watermarks.dcn4x.b.refcyc_per_meta_trip_to_mem) wm_pending = true; return wm_pending; @@ -192,89 +192,89 @@ bool hubbub401_program_stutter_watermarks( bool wm_pending = false; /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.sr_enter - > hubbub2->watermarks.dcn4.a.sr_enter) { - hubbub2->watermarks.dcn4.a.sr_enter = - watermarks->dcn4.a.sr_enter; + if (safe_to_lower || watermarks->dcn4x.a.sr_enter + > hubbub2->watermarks.dcn4x.a.sr_enter) { + hubbub2->watermarks.dcn4x.a.sr_enter = + watermarks->dcn4x.a.sr_enter; REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, watermarks->dcn4x.a.sr_enter); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.a.sr_enter, watermarks->dcn4.a.sr_enter); + watermarks->dcn4x.a.sr_enter, watermarks->dcn4x.a.sr_enter); // On dGPU Z states are N/A, so program all other 3 Stutter Enter wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_A, watermarks->dcn4x.a.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_A, watermarks->dcn4x.a.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, watermarks->dcn4.a.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_A, watermarks->dcn4x.a.sr_enter); - } else if (watermarks->dcn4.a.sr_enter - < hubbub2->watermarks.dcn4.a.sr_enter) + } else if (watermarks->dcn4x.a.sr_enter + < hubbub2->watermarks.dcn4x.a.sr_enter) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.a.sr_exit - > hubbub2->watermarks.dcn4.a.sr_exit) { - hubbub2->watermarks.dcn4.a.sr_exit = - watermarks->dcn4.a.sr_exit; + if (safe_to_lower || watermarks->dcn4x.a.sr_exit + > hubbub2->watermarks.dcn4x.a.sr_exit) { + hubbub2->watermarks.dcn4x.a.sr_exit = + watermarks->dcn4x.a.sr_exit; REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, watermarks->dcn4x.a.sr_exit); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.a.sr_exit, watermarks->dcn4.a.sr_exit); + watermarks->dcn4x.a.sr_exit, watermarks->dcn4x.a.sr_exit); // On dGPU Z states are N/A, so program all other 3 Stutter Exit wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_A, watermarks->dcn4x.a.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_A, watermarks->dcn4x.a.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, watermarks->dcn4.a.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_A, watermarks->dcn4x.a.sr_exit); - } else if (watermarks->dcn4.a.sr_exit - < hubbub2->watermarks.dcn4.a.sr_exit) + } else if (watermarks->dcn4x.a.sr_exit + < hubbub2->watermarks.dcn4x.a.sr_exit) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.sr_enter - > hubbub2->watermarks.dcn4.b.sr_enter) { - hubbub2->watermarks.dcn4.b.sr_enter = - watermarks->dcn4.b.sr_enter; + if (safe_to_lower || watermarks->dcn4x.b.sr_enter + > hubbub2->watermarks.dcn4x.b.sr_enter) { + hubbub2->watermarks.dcn4x.b.sr_enter = + watermarks->dcn4x.b.sr_enter; REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, watermarks->dcn4x.b.sr_enter); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.b.sr_enter, watermarks->dcn4.b.sr_enter); + watermarks->dcn4x.b.sr_enter, watermarks->dcn4x.b.sr_enter); // On dGPU Z states are N/A, so program all other 3 Stutter Enter wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK1_B, watermarks->dcn4x.b.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK2_B, watermarks->dcn4x.b.sr_enter); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, watermarks->dcn4.b.sr_enter); + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK3_B, watermarks->dcn4x.b.sr_enter); - } else if (watermarks->dcn4.b.sr_enter - < hubbub2->watermarks.dcn4.b.sr_enter) + } else if (watermarks->dcn4x.b.sr_enter + < hubbub2->watermarks.dcn4x.b.sr_enter) wm_pending = true; - if (safe_to_lower || watermarks->dcn4.b.sr_exit - > hubbub2->watermarks.dcn4.b.sr_exit) { - hubbub2->watermarks.dcn4.b.sr_exit = - watermarks->dcn4.b.sr_exit; + if (safe_to_lower || watermarks->dcn4x.b.sr_exit + > hubbub2->watermarks.dcn4x.b.sr_exit) { + hubbub2->watermarks.dcn4x.b.sr_exit = + watermarks->dcn4x.b.sr_exit; REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, watermarks->dcn4x.b.sr_exit); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n", - watermarks->dcn4.b.sr_exit, watermarks->dcn4.b.sr_exit); + watermarks->dcn4x.b.sr_exit, watermarks->dcn4x.b.sr_exit); // On dGPU Z states are N/A, so program all other 3 Stutter Exit wm A with the same value REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK1_B, watermarks->dcn4x.b.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK2_B, watermarks->dcn4x.b.sr_exit); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, watermarks->dcn4.b.sr_exit); + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK3_B, watermarks->dcn4x.b.sr_exit); - } else if (watermarks->dcn4.b.sr_exit - < hubbub2->watermarks.dcn4.b.sr_exit) + } else if (watermarks->dcn4x.b.sr_exit + < hubbub2->watermarks.dcn4x.b.sr_exit) wm_pending = true; return wm_pending; @@ -292,116 +292,116 @@ bool hubbub401_program_pstate_watermarks( /* Section for UCLK_PSTATE_CHANGE_WATERMARKS */ /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.uclk_pstate - > hubbub2->watermarks.dcn4.a.uclk_pstate) { - hubbub2->watermarks.dcn4.a.uclk_pstate = - watermarks->dcn4.a.uclk_pstate; + if (safe_to_lower || watermarks->dcn4x.a.uclk_pstate + > hubbub2->watermarks.dcn4x.a.uclk_pstate) { + hubbub2->watermarks.dcn4x.a.uclk_pstate = + watermarks->dcn4x.a.uclk_pstate; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4.a.uclk_pstate); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4x.a.uclk_pstate); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.uclk_pstate, watermarks->dcn4.a.uclk_pstate); - } else if (watermarks->dcn4.a.uclk_pstate - < hubbub2->watermarks.dcn4.a.uclk_pstate) + watermarks->dcn4x.a.uclk_pstate, watermarks->dcn4x.a.uclk_pstate); + } else if (watermarks->dcn4x.a.uclk_pstate + < hubbub2->watermarks.dcn4x.a.uclk_pstate) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.uclk_pstate - > hubbub2->watermarks.dcn4.b.uclk_pstate) { - hubbub2->watermarks.dcn4.b.uclk_pstate = - watermarks->dcn4.b.uclk_pstate; + if (safe_to_lower || watermarks->dcn4x.b.uclk_pstate + > hubbub2->watermarks.dcn4x.b.uclk_pstate) { + hubbub2->watermarks.dcn4x.b.uclk_pstate = + watermarks->dcn4x.b.uclk_pstate; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4.b.uclk_pstate); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4x.b.uclk_pstate); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.uclk_pstate, watermarks->dcn4.b.uclk_pstate); - } else if (watermarks->dcn4.b.uclk_pstate - < hubbub2->watermarks.dcn4.b.uclk_pstate) + watermarks->dcn4x.b.uclk_pstate, watermarks->dcn4x.b.uclk_pstate); + } else if (watermarks->dcn4x.b.uclk_pstate + < hubbub2->watermarks.dcn4x.b.uclk_pstate) wm_pending = true; /* Section for UCLK_PSTATE_CHANGE_WATERMARKS1 (DUMMY_PSTATE/TEMP_READ/PPT) */ - if (safe_to_lower || watermarks->dcn4.a.temp_read_or_ppt - > hubbub2->watermarks.dcn4.a.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.a.temp_read_or_ppt = - watermarks->dcn4.a.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.a.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.a.temp_read_or_ppt = + watermarks->dcn4x.a.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4.a.temp_read_or_ppt); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4x.a.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK1_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.temp_read_or_ppt, watermarks->dcn4.a.temp_read_or_ppt); - } else if (watermarks->dcn4.a.temp_read_or_ppt - < hubbub2->watermarks.dcn4.a.temp_read_or_ppt) + watermarks->dcn4x.a.temp_read_or_ppt, watermarks->dcn4x.a.temp_read_or_ppt); + } else if (watermarks->dcn4x.a.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.temp_read_or_ppt - > hubbub2->watermarks.dcn4.b.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.b.temp_read_or_ppt = - watermarks->dcn4.b.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.b.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.b.temp_read_or_ppt = + watermarks->dcn4x.b.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, 0, - DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4.b.temp_read_or_ppt); + DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4x.b.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK1_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.temp_read_or_ppt, watermarks->dcn4.b.temp_read_or_ppt); - } else if (watermarks->dcn4.b.temp_read_or_ppt - < hubbub2->watermarks.dcn4.b.temp_read_or_ppt) + watermarks->dcn4x.b.temp_read_or_ppt, watermarks->dcn4x.b.temp_read_or_ppt); + } else if (watermarks->dcn4x.b.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) wm_pending = true; /* Section for FCLK_PSTATE_CHANGE_WATERMARKS */ /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.fclk_pstate - > hubbub2->watermarks.dcn4.a.fclk_pstate) { - hubbub2->watermarks.dcn4.a.fclk_pstate = - watermarks->dcn4.a.fclk_pstate; + if (safe_to_lower || watermarks->dcn4x.a.fclk_pstate + > hubbub2->watermarks.dcn4x.a.fclk_pstate) { + hubbub2->watermarks.dcn4x.a.fclk_pstate = + watermarks->dcn4x.a.fclk_pstate; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4.a.fclk_pstate); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_A, watermarks->dcn4x.a.fclk_pstate); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.fclk_pstate, watermarks->dcn4.a.fclk_pstate); - } else if (watermarks->dcn4.a.fclk_pstate - < hubbub2->watermarks.dcn4.a.fclk_pstate) + watermarks->dcn4x.a.fclk_pstate, watermarks->dcn4x.a.fclk_pstate); + } else if (watermarks->dcn4x.a.fclk_pstate + < hubbub2->watermarks.dcn4x.a.fclk_pstate) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.fclk_pstate - > hubbub2->watermarks.dcn4.b.fclk_pstate) { - hubbub2->watermarks.dcn4.b.fclk_pstate = - watermarks->dcn4.b.fclk_pstate; + if (safe_to_lower || watermarks->dcn4x.b.fclk_pstate + > hubbub2->watermarks.dcn4x.b.fclk_pstate) { + hubbub2->watermarks.dcn4x.b.fclk_pstate = + watermarks->dcn4x.b.fclk_pstate; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4.b.fclk_pstate); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B, watermarks->dcn4x.b.fclk_pstate); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.fclk_pstate, watermarks->dcn4.b.fclk_pstate); - } else if (watermarks->dcn4.b.fclk_pstate - < hubbub2->watermarks.dcn4.b.fclk_pstate) + watermarks->dcn4x.b.fclk_pstate, watermarks->dcn4x.b.fclk_pstate); + } else if (watermarks->dcn4x.b.fclk_pstate + < hubbub2->watermarks.dcn4x.b.fclk_pstate) wm_pending = true; /* Section for FCLK_CHANGE_WATERMARKS1 (DUMMY_PSTATE/TEMP_READ/PPT) */ - if (safe_to_lower || watermarks->dcn4.a.temp_read_or_ppt - > hubbub2->watermarks.dcn4.a.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.a.temp_read_or_ppt = - watermarks->dcn4.a.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.a.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.a.temp_read_or_ppt = + watermarks->dcn4x.a.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4.a.temp_read_or_ppt); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_A, watermarks->dcn4x.a.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK1_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.temp_read_or_ppt, watermarks->dcn4.a.temp_read_or_ppt); - } else if (watermarks->dcn4.a.temp_read_or_ppt - < hubbub2->watermarks.dcn4.a.temp_read_or_ppt) + watermarks->dcn4x.a.temp_read_or_ppt, watermarks->dcn4x.a.temp_read_or_ppt); + } else if (watermarks->dcn4x.a.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.a.temp_read_or_ppt) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.temp_read_or_ppt - > hubbub2->watermarks.dcn4.b.temp_read_or_ppt) { - hubbub2->watermarks.dcn4.b.temp_read_or_ppt = - watermarks->dcn4.b.temp_read_or_ppt; + if (safe_to_lower || watermarks->dcn4x.b.temp_read_or_ppt + > hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) { + hubbub2->watermarks.dcn4x.b.temp_read_or_ppt = + watermarks->dcn4x.b.temp_read_or_ppt; REG_SET(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, 0, - DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4.b.temp_read_or_ppt); + DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK1_B, watermarks->dcn4x.b.temp_read_or_ppt); DC_LOG_BANDWIDTH_CALCS("FCLK_CHANGE_WATERMARK1_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.temp_read_or_ppt, watermarks->dcn4.b.temp_read_or_ppt); - } else if (watermarks->dcn4.b.temp_read_or_ppt - < hubbub2->watermarks.dcn4.b.temp_read_or_ppt) + watermarks->dcn4x.b.temp_read_or_ppt, watermarks->dcn4x.b.temp_read_or_ppt); + } else if (watermarks->dcn4x.b.temp_read_or_ppt + < hubbub2->watermarks.dcn4x.b.temp_read_or_ppt) wm_pending = true; return wm_pending; @@ -418,29 +418,29 @@ bool hubbub401_program_usr_watermarks( bool wm_pending = false; /* clock state A */ - if (safe_to_lower || watermarks->dcn4.a.usr - > hubbub2->watermarks.dcn4.a.usr) { - hubbub2->watermarks.dcn4.a.usr = watermarks->dcn4.a.usr; + if (safe_to_lower || watermarks->dcn4x.a.usr + > hubbub2->watermarks.dcn4x.a.usr) { + hubbub2->watermarks.dcn4x.a.usr = watermarks->dcn4x.a.usr; REG_SET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, 0, - DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, watermarks->dcn4.a.usr); + DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, watermarks->dcn4x.a.usr); DC_LOG_BANDWIDTH_CALCS("USR_RETRAINING_WATERMARK_A calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.a.usr, watermarks->dcn4.a.usr); - } else if (watermarks->dcn4.a.usr - < hubbub2->watermarks.dcn4.a.usr) + watermarks->dcn4x.a.usr, watermarks->dcn4x.a.usr); + } else if (watermarks->dcn4x.a.usr + < hubbub2->watermarks.dcn4x.a.usr) wm_pending = true; /* clock state B */ - if (safe_to_lower || watermarks->dcn4.b.usr - > hubbub2->watermarks.dcn4.b.usr) { - hubbub2->watermarks.dcn4.b.usr = watermarks->dcn4.b.usr; + if (safe_to_lower || watermarks->dcn4x.b.usr + > hubbub2->watermarks.dcn4x.b.usr) { + hubbub2->watermarks.dcn4x.b.usr = watermarks->dcn4x.b.usr; REG_SET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, 0, - DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, watermarks->dcn4.b.usr); + DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, watermarks->dcn4x.b.usr); DC_LOG_BANDWIDTH_CALCS("USR_RETRAINING_WATERMARK_B calculated =%d\n" "HW register value = 0x%x\n\n", - watermarks->dcn4.b.usr, watermarks->dcn4.b.usr); - } else if (watermarks->dcn4.b.usr - < hubbub2->watermarks.dcn4.b.usr) + watermarks->dcn4x.b.usr, watermarks->dcn4x.b.usr); + } else if (watermarks->dcn4x.b.usr + < hubbub2->watermarks.dcn4x.b.usr) wm_pending = true; return wm_pending; @@ -1170,6 +1170,28 @@ static void dcn401_program_compbuf_segments(struct hubbub *hubbub, unsigned comp } } +static void dcn401_wait_for_det_update(struct hubbub *hubbub, int hubp_inst) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + switch (hubp_inst) { + case 0: + REG_WAIT(DCHUBBUB_DET0_CTRL, DET0_SIZE_CURRENT, hubbub2->det0_size, 1, 100000); /* 1 vupdate at 10hz */ + break; + case 1: + REG_WAIT(DCHUBBUB_DET1_CTRL, DET1_SIZE_CURRENT, hubbub2->det1_size, 1, 100000); + break; + case 2: + REG_WAIT(DCHUBBUB_DET2_CTRL, DET2_SIZE_CURRENT, hubbub2->det2_size, 1, 100000); + break; + case 3: + REG_WAIT(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, hubbub2->det3_size, 1, 100000); + break; + default: + break; + } +} + static const struct hubbub_funcs hubbub4_01_funcs = { .update_dchub = hubbub2_update_dchub, .init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx, @@ -1192,6 +1214,7 @@ static const struct hubbub_funcs hubbub4_01_funcs = { .set_request_limit = hubbub32_set_request_limit, .program_det_segments = dcn401_program_det_segments, .program_compbuf_segments = dcn401_program_compbuf_segments, + .wait_for_det_update = dcn401_wait_for_det_update, }; void hubbub401_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c index bf399819ca80..22ac2b7e49ae 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c @@ -749,7 +749,8 @@ bool hubp1_is_flip_pending(struct hubp *hubp) if (flip_pending) return true; - if (earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) + if (hubp && + earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) return true; return false; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c index 6bba020ad6fb..0637e4c552d8 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c @@ -927,7 +927,8 @@ bool hubp2_is_flip_pending(struct hubp *hubp) if (flip_pending) return true; - if (earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) + if (hubp && + earliest_inuse_address.grph.addr.quad_part != hubp->request_address.grph.addr.quad_part) return true; return false; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c index 771fcd0d3b99..d1f05b82b3dd 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c @@ -188,7 +188,7 @@ void hubp35_program_surface_config( hubp35_program_pixel_format(hubp, format); } -struct hubp_funcs dcn35_hubp_funcs = { +static struct hubp_funcs dcn35_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, .hubp_program_surface_flip_and_addr = hubp3_program_surface_flip_and_addr, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index eb0da6c6b87c..b1ebf5053b4f 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -725,8 +725,8 @@ void hubp401_cursor_set_position( CURSOR_ENABLE, cur_en); REG_SET_2(CURSOR_POSITION, 0, - CURSOR_X_POSITION, pos->x, - CURSOR_Y_POSITION, pos->y); + CURSOR_X_POSITION, x_pos, + CURSOR_Y_POSITION, y_pos); REG_SET_2(CURSOR_HOT_SPOT, 0, CURSOR_HOT_SPOT_X, pos->x_hotspot, @@ -990,7 +990,6 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_soft_reset = hubp31_soft_reset, .hubp_set_flip_int = hubp401_set_flip_int, .hubp_in_blank = hubp401_in_blank, - .hubp_update_force_pstate_disallow = hubp32_update_force_pstate_disallow, .phantom_hubp_post_enable = hubp32_phantom_hubp_post_enable, .hubp_update_mall_sel = hubp401_update_mall_sel, .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 1f2eb2f727dc..d52ce58c6a98 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -949,7 +949,7 @@ void dce110_edp_backlight_control( { struct dc_context *ctx = link->ctx; struct bp_transmitter_control cntl = { 0 }; - uint8_t pwrseq_instance; + uint8_t pwrseq_instance = 0; unsigned int pre_T11_delay = OLED_PRE_T11_DELAY; unsigned int post_T7_delay = OLED_POST_T7_DELAY; @@ -1002,7 +1002,8 @@ void dce110_edp_backlight_control( */ /* dc_service_sleep_in_milliseconds(50); */ /*edp 1.2*/ - pwrseq_instance = link->panel_cntl->pwrseq_inst; + if (link->panel_cntl) + pwrseq_instance = link->panel_cntl->pwrseq_inst; if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) { if (!link->dc->config.edp_no_power_sequencing) @@ -1231,20 +1232,21 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) * has changed or they enter protection state and hang. */ msleep(60); - } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) { - if (!link->dc->config.edp_no_power_sequencing) { - /* - * Sometimes, DP receiver chip power-controlled externally by an - * Embedded Controller could be treated and used as eDP, - * if it drives mobile display. In this case, - * we shouldn't be doing power-sequencing, hence we can skip - * waiting for T9-ready. - */ - link->dc->link_srv->edp_receiver_ready_T9(link); - } } } + if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + !link->dc->config.edp_no_power_sequencing) { + /* + * Sometimes, DP receiver chip power-controlled externally by an + * Embedded Controller could be treated and used as eDP, + * if it drives mobile display. In this case, + * we shouldn't be doing power-sequencing, hence we can skip + * waiting for T9-ready. + */ + link->dc->link_srv->edp_receiver_ready_T9(link); + } + } @@ -1549,6 +1551,7 @@ static enum dc_status dce110_enable_stream_timing( 0, 0, 0, + 0, pipe_ctx->stream->signal, true); } @@ -1597,6 +1600,11 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( &audio_output.crtc_info, &pipe_ctx->stream->audio_info, &audio_output.dp_link_info); + + if (dc->config.disable_hbr_audio_dp2) + if (pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio && + dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) + pipe_ctx->stream_res.audio->funcs->az_disable_hbr_audio(pipe_ctx->stream_res.audio); } /* make sure no pipes syncd to the pipe being enabled */ @@ -1838,6 +1846,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) bool can_apply_edp_fast_boot = false; bool can_apply_seamless_boot = false; bool keep_edp_vdd_on = false; + struct dc_bios *dcb = dc->ctx->dc_bios; DC_LOGGER_INIT(); @@ -1914,13 +1923,15 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) hws->funcs.edp_backlight_control(edp_link_with_sink, false); } /*resume from S3, no vbios posting, no need to power down again*/ - clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); + if (dcb && dcb->funcs && !dcb->funcs->is_accelerated_mode(dcb)) + clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr); power_down_all_hw_blocks(dc); disable_vga_and_power_gate_all_controllers(dc); if (edp_link_with_sink && !keep_edp_vdd_on) dc->hwss.edp_power_control(edp_link_with_sink, false); - clk_mgr_optimize_pwr_state(dc, dc->clk_mgr); + if (dcb && dcb->funcs && !dcb->funcs->is_accelerated_mode(dcb)) + clk_mgr_optimize_pwr_state(dc, dc->clk_mgr); } bios_set_scratch_acc_mode_change(dc->ctx->dc_bios, 1); } @@ -2340,19 +2351,6 @@ static void dce110_setup_audio_dto( } } -static bool dce110_is_hpo_enabled(struct dc_state *context) -{ - int i; - - for (i = 0; i < MAX_HPO_DP2_ENCODERS; i++) { - if (context->res_ctx.is_hpo_dp_stream_enc_acquired[i]) { - return true; - } - } - - return false; -} - enum dc_status dce110_apply_ctx_to_hw( struct dc *dc, struct dc_state *context) @@ -2361,8 +2359,8 @@ enum dc_status dce110_apply_ctx_to_hw( struct dc_bios *dcb = dc->ctx->dc_bios; enum dc_status status; int i; - bool was_hpo_enabled = dce110_is_hpo_enabled(dc->current_state); - bool is_hpo_enabled = dce110_is_hpo_enabled(context); + bool was_hpo_acquired = resource_is_hpo_acquired(dc->current_state); + bool is_hpo_acquired = resource_is_hpo_acquired(context); /* reset syncd pipes from disabled pipes */ if (dc->config.use_pipe_ctx_sync_logic) @@ -2405,8 +2403,8 @@ enum dc_status dce110_apply_ctx_to_hw( dce110_setup_audio_dto(dc, context); - if (dc->hwseq->funcs.setup_hpo_hw_control && was_hpo_enabled != is_hpo_enabled) { - dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, is_hpo_enabled); + if (dc->hwseq->funcs.setup_hpo_hw_control && was_hpo_acquired != is_hpo_acquired) { + dc->hwseq->funcs.setup_hpo_hw_control(dc->hwseq, is_hpo_acquired); } for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -2438,7 +2436,7 @@ enum dc_status dce110_apply_ctx_to_hw( #ifdef CONFIG_DRM_AMD_DC_FP if (hws->funcs.resync_fifo_dccg_dio) - hws->funcs.resync_fifo_dccg_dio(hws, dc, context); + hws->funcs.resync_fifo_dccg_dio(hws, dc, context, i); #endif } @@ -3312,7 +3310,6 @@ static const struct hw_sequencer_funcs dce110_funcs = { static const struct hwseq_private_funcs dce110_private_funcs = { .init_pipes = init_pipes, - .update_plane_addr = update_plane_addr, .set_input_transfer_func = dce110_set_input_transfer_func, .set_output_transfer_func = dce110_set_output_transfer_func, .power_down = dce110_power_down, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index e06fc370267b..01dffed4d30b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1005,6 +1005,7 @@ enum dc_status dcn10_enable_stream_timing( pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout, pipe_ctx->stream->signal, true); @@ -1402,6 +1403,8 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } @@ -1552,7 +1555,7 @@ void dcn10_init_hw(struct dc *dc) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); /* Align bw context with hw config when system resume. */ - if (dc->clk_mgr->clks.dispclk_khz != 0 && dc->clk_mgr->clks.dppclk_khz != 0) { + if (dc->clk_mgr && dc->clk_mgr->clks.dispclk_khz != 0 && dc->clk_mgr->clks.dppclk_khz != 0) { dc->current_state->bw_ctx.bw.dcn.clk.dispclk_khz = dc->clk_mgr->clks.dispclk_khz; dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz = dc->clk_mgr->clks.dppclk_khz; } @@ -1672,7 +1675,7 @@ void dcn10_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); } @@ -1695,10 +1698,10 @@ void dcn10_power_down_on_boot(struct dc *dc) if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwseq->funcs.edp_backlight_control && - dc->hwss.power_down && + dc->hwseq->funcs.power_down && dc->hwss.edp_power_control) { dc->hwseq->funcs.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { @@ -1706,8 +1709,8 @@ void dcn10_power_down_on_boot(struct dc *dc) if (link->link_enc && link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down) { + dc->hwseq->funcs.power_down(dc); break; } @@ -2583,8 +2586,11 @@ static bool dcn10_is_rear_mpo_fix_required(struct pipe_ctx *pipe_ctx, enum dc_co while (top->top_pipe) top = top->top_pipe; // Traverse to top pipe_ctx - if (top->plane_state && top->plane_state->layer_index == 0) - return true; // Front MPO plane not hidden + if (top->plane_state && top->plane_state->layer_index == 0 && !top->plane_state->global_alpha) + // Global alpha used by top plane for PIP overlay + // Pre-multiplied/per-pixel alpha used by MPO + // Check top plane's global alpha to ensure layer_index > 0 not caused by PIP + return true; // MPO in use and front plane not hidden } } return false; @@ -2912,7 +2918,7 @@ static void dcn10_update_dchubp_dpp( hubp->power_gated = false; - hws->funcs.update_plane_addr(dc, pipe_ctx); + dc->hwss.update_plane_addr(dc, pipe_ctx); if (is_pipe_tree_visible(pipe_ctx)) hubp->funcs->set_blank(hubp, false); @@ -2995,7 +3001,8 @@ void dcn10_program_pipe( calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); pipe_ctx->stream_res.tg->funcs->set_vtg_params( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true); @@ -3587,7 +3594,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; + pos_cpy.x = 2 * viewport_width - temp_x; } } } else { @@ -3680,7 +3687,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = 2 * viewport_width - temp_x; + pos_cpy.x = temp_x + viewport_width; } } } else { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c index a5bdac79a744..5e51e1761707 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_init.c @@ -78,7 +78,6 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .get_clock = dcn10_get_clock, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dce110_set_backlight_level, .set_abm_immediate_disable = dce110_set_abm_immediate_disable, .set_pipe = dce110_set_pipe, @@ -92,7 +91,6 @@ static const struct hw_sequencer_funcs dcn10_funcs = { static const struct hwseq_private_funcs dcn10_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn10_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .program_pipe = dcn10_program_pipe, .update_mpcc = dcn10_update_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 2532ad410cb5..a80c08582932 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -909,6 +909,7 @@ enum dc_status dcn20_enable_stream_timing( pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout, pipe_ctx->stream->signal, true); @@ -1044,7 +1045,8 @@ bool dcn20_set_output_transfer_func(struct dc *dc, struct pipe_ctx *pipe_ctx, /* * if above if is not executed then 'params' equal to 0 and set in bypass */ - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); return true; } @@ -1698,7 +1700,7 @@ static void dcn20_update_dchubp_dpp( plane_state->update_flags.bits.input_csc_change || plane_state->update_flags.bits.color_space_change || plane_state->update_flags.bits.coeff_reduction_change) { - struct dc_bias_and_scale bns_params = {0}; + struct dc_bias_and_scale bns_params = plane_state->bias_and_scale; // program the input csc dpp->funcs->dpp_setup(dpp, @@ -1715,7 +1717,6 @@ static void dcn20_update_dchubp_dpp( } if (dpp->funcs->dpp_program_bias_and_scale) { //TODO :for CNVC set scale and bias registers if necessary - build_prescale_params(&bns_params, plane_state); dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); } } @@ -1825,7 +1826,7 @@ static void dcn20_update_dchubp_dpp( params.subvp_save_surf_addr.subvp_index = pipe_ctx->subvp_index; hwss_subvp_save_surf_addr(¶ms); } - hws->funcs.update_plane_addr(dc, pipe_ctx); + dc->hwss.update_plane_addr(dc, pipe_ctx); } if (pipe_ctx->update_flags.bits.enable) @@ -1886,7 +1887,8 @@ static void dcn20_program_pipe( calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE); @@ -1921,22 +1923,28 @@ static void dcn20_program_pipe( dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); } - if (pipe_ctx->update_flags.raw || pipe_ctx->plane_state->update_flags.raw || pipe_ctx->stream->update_flags.raw) + if (pipe_ctx->update_flags.raw || + (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || + pipe_ctx->stream->update_flags.raw) dcn20_update_dchubp_dpp(dc, pipe_ctx, context); - if (pipe_ctx->update_flags.bits.enable - || pipe_ctx->plane_state->update_flags.bits.hdr_mult) + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || + pipe_ctx->plane_state->update_flags.bits.hdr_mult)) hws->funcs.set_hdr_multiplier(pipe_ctx); if (hws->funcs.populate_mcm_luts) { - hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, - pipe_ctx->plane_state->lut_bank_a); - pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; + if (pipe_ctx->plane_state) { + hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts, + pipe_ctx->plane_state->lut_bank_a); + pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a; + } } - if (pipe_ctx->update_flags.bits.enable || - pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || + + if (pipe_ctx->plane_state && + (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change || pipe_ctx->plane_state->update_flags.bits.gamma_change || - pipe_ctx->plane_state->update_flags.bits.lut_3d) + pipe_ctx->plane_state->update_flags.bits.lut_3d || + pipe_ctx->update_flags.bits.enable)) hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state); /* dcn10_translate_regamma_to_hw_format takes 750us to finish @@ -1946,7 +1954,8 @@ static void dcn20_program_pipe( if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.plane_changed || pipe_ctx->stream->update_flags.bits.out_tf || - pipe_ctx->plane_state->update_flags.bits.output_tf_change) + (pipe_ctx->plane_state && + pipe_ctx->plane_state->update_flags.bits.output_tf_change)) hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream); /* If the pipe has been enabled or has a different opp, we @@ -1970,7 +1979,7 @@ static void dcn20_program_pipe( } /* Set ABM pipe after other pipe configurations done */ - if (pipe_ctx->plane_state->visible) { + if ((pipe_ctx->plane_state && pipe_ctx->plane_state->visible)) { if (pipe_ctx->stream_res.abm) { dc->hwss.set_pipe(pipe_ctx); pipe_ctx->stream_res.abm->funcs->set_abm_level(pipe_ctx->stream_res.abm, @@ -2186,9 +2195,9 @@ static void post_unlock_reset_opp(struct dc *dc, * yet power gated. */ dsc->funcs->dsc_wait_disconnect_pending_clear(dsc); + dsc->funcs->dsc_disable(dsc); if (dccg->funcs->set_ref_dscclk) dccg->funcs->set_ref_dscclk(dccg, dsc->inst); - dsc->funcs->dsc_disable(dsc); } } } @@ -2283,6 +2292,9 @@ void dcn20_post_unlock_program_front_end( } } + if (!hwseq) + return; + /* P-State support transitions: * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally) @@ -2290,7 +2302,7 @@ void dcn20_post_unlock_program_front_end( * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes */ - if (hwseq && hwseq->funcs.update_force_pstate) + if (hwseq->funcs.update_force_pstate) dc->hwseq->funcs.update_force_pstate(dc, context); /* Only program the MALL registers after all the main and phantom pipes @@ -2459,7 +2471,8 @@ bool dcn20_update_bandwidth( calculate_vready_offset_for_group(pipe_ctx), pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, - pipe_ctx->pipe_dlg_param.vupdate_width); + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout); pipe_ctx->stream_res.tg->funcs->set_vtg_params( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false); @@ -2529,6 +2542,9 @@ bool dcn20_wait_for_blank_complete( { int counter; + if (!opp) + return false; + for (counter = 0; counter < 1000; counter++) { if (!opp->funcs->dpg_is_pending(opp)) break; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c index ef6488165b8f..32707b344f0b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_init.c @@ -105,7 +105,6 @@ static const struct hw_sequencer_funcs dcn20_funcs = { static const struct hwseq_private_funcs dcn20_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn20_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c index a13bf6c9386e..78351408e864 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn201/dcn201_init.c @@ -96,7 +96,6 @@ static const struct hw_sequencer_funcs dcn201_funcs = { static const struct hwseq_private_funcs dcn201_private_funcs = { .init_pipes = NULL, - .update_plane_addr = dcn201_update_plane_addr, .plane_atomic_disconnect = dcn201_plane_atomic_disconnect, .program_pipe = dcn10_program_pipe, .update_mpcc = dcn201_update_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c index 3dfac372d165..e044e9e0a3a1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_init.c @@ -93,7 +93,6 @@ static const struct hw_sequencer_funcs dcn21_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -109,7 +108,6 @@ static const struct hw_sequencer_funcs dcn21_funcs = { static const struct hwseq_private_funcs dcn21_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn20_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c index eaeeade31ed7..42c52284a868 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_hwseq.c @@ -398,7 +398,11 @@ bool dcn30_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + else + DC_LOG_ERROR("%s: set_output_gamma function pointer is NULL.\n", __func__); + return ret; } @@ -625,7 +629,7 @@ void dcn30_init_hw(struct dc *dc) uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; - if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // Initialize the dccg @@ -731,10 +735,10 @@ void dcn30_init_hw(struct dc *dc) if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwss.edp_backlight_control && - dc->hwss.power_down && + hws->funcs.power_down && dc->hwss.edp_power_control) { dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + hws->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { @@ -742,8 +746,8 @@ void dcn30_init_hw(struct dc *dc) if (link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + hws->funcs.power_down) { + hws->funcs.power_down(dc); break; } @@ -786,11 +790,12 @@ void dcn30_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); //if softmax is enabled then hardmax will be set by a different call - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->set_hard_max_memclk && + !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c index 4b32497c09d0..2a8dc40d2847 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c @@ -113,7 +113,6 @@ static const struct hw_sequencer_funcs dcn30_funcs = { static const struct hwseq_private_funcs dcn30_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c index 97e33eb7ac5a..93e49d87a67c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c @@ -111,7 +111,6 @@ static const struct hw_sequencer_funcs dcn301_funcs = { static const struct hwseq_private_funcs dcn301_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 746c522adf84..3d4b31bd9946 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -256,10 +256,10 @@ void dcn31_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index 9cb7afe0e731..56f3c70d4b55 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -98,7 +98,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -112,11 +111,11 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .optimize_pwr_state = dcn21_optimize_pwr_state, .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, + .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn31_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index 388404cdeeaa..4e93eeedfc1b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -355,14 +355,18 @@ void dcn314_calculate_pix_rate_divider( } } -void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context) +void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx) { unsigned int i; struct pipe_ctx *pipe = NULL; bool otg_disabled[MAX_PIPES] = {false}; for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) { + pipe = &context->res_ctx.pipe_ctx[i]; + } else { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + } if (pipe->top_pipe || pipe->prev_odm_pipe) continue; @@ -377,7 +381,10 @@ void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc hws->ctx->dc->res_pool->dccg->funcs->trigger_dio_fifo_resync(hws->ctx->dc->res_pool->dccg); for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) + pipe = &context->res_ctx.pipe_ctx[i]; + else + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (otg_disabled[i]) { int opp_inst[MAX_PIPES] = { pipe->stream_res.opp->inst }; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h index fb4f90f61b22..2305ad282f21 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h @@ -41,7 +41,7 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig void dcn314_calculate_pix_rate_divider(struct dc *dc, struct dc_state *context, const struct dc_stream_state *stream); -void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context); +void dcn314_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx); void dcn314_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, bool clock_on); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index 7a8db4b81471..68e6de6b5758 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -100,7 +100,6 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -115,11 +114,11 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .calculate_pix_rate_divider = dcn314_calculate_pix_rate_divider, + .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn314_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn30_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 05d8f81daa06..a36e11606f90 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -582,7 +582,9 @@ bool dcn32_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + return ret; } @@ -779,7 +781,7 @@ void dcn32_init_hw(struct dc *dc) uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; - if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // Initialize the dccg @@ -901,10 +903,10 @@ void dcn32_init_hw(struct dc *dc) if (edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwss.edp_backlight_control && - dc->hwss.power_down && + hws->funcs.power_down && dc->hwss.edp_power_control) { dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + hws->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } } @@ -914,8 +916,8 @@ void dcn32_init_hw(struct dc *dc) if (link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + hws->funcs.power_down) { + hws->funcs.power_down(dc); break; } @@ -958,10 +960,11 @@ void dcn32_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->set_hard_max_memclk && + !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); if (dc->res_pool->hubbub->funcs->force_pstate_change_control) @@ -982,8 +985,19 @@ void dcn32_init_hw(struct dc *dc) dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable; dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; - if (dc->ctx->dmub_srv->dmub->fw_version < + /* for DCN401 testing only */ + dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; + if (dc->caps.dmub_caps.fams_ver == 2) { + /* FAMS2 is enabled */ + dc->debug.fams2_config.bits.enable &= true; + } else if (dc->ctx->dmub_srv->dmub->fw_version < DMUB_FW_VERSION(7, 0, 35)) { + /* FAMS2 is disabled */ + dc->debug.fams2_config.bits.enable = false; + if (dc->debug.using_dml2 && dc->res_pool->funcs->update_bw_bounding_box) { + /* update bounding box if FAMS2 disabled */ + dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); + } dc->debug.force_disable_subvp = true; dc->debug.disable_fpo_optimizations = true; } @@ -1029,24 +1043,20 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0); dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, dsc->inst); dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst, true); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; ASSERT(odm_dsc); + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst, true); } - dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt; - dsc_cfg.pic_width *= opp_cnt; - optc_dsc_mode = dsc_optc_cfg.is_pixel_format_444 ? OPTC_DSC_ENABLED_444 : OPTC_DSC_ENABLED_NATIVE_SUBSAMPLED; - /* Enable DSC in OPTC */ DC_LOG_DSC("Setting optc DSC config for tg instance %d:", pipe_ctx->stream_res.tg->inst); pipe_ctx->stream_res.tg->funcs->set_dsc_config(pipe_ctx->stream_res.tg, @@ -1060,13 +1070,9 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) OPTC_DSC_DISABLED, 0, 0); /* only disconnect DSC block, DSC is disabled when OPP head pipe is reset */ - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, pipe_ctx->stream_res.dsc->inst, false); - dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc); + dsc->funcs->dsc_disconnect(pipe_ctx->stream_res.dsc); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { ASSERT(odm_pipe->stream_res.dsc); - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_pipe->stream_res.dsc->inst, false); odm_pipe->stream_res.dsc->funcs->dsc_disconnect(odm_pipe->stream_res.dsc); } } @@ -1137,10 +1143,7 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * if (!pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe && current_pipe_ctx->next_odm_pipe->stream_res.dsc) { struct display_stream_compressor *dsc = current_pipe_ctx->next_odm_pipe->stream_res.dsc; - struct dccg *dccg = dc->res_pool->dccg; - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst, false); /* disconnect DSC block from stream */ dsc->funcs->dsc_disconnect(dsc); } @@ -1212,20 +1215,27 @@ void dcn32_calculate_pix_rate_divider( } } -void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context) +void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx) { unsigned int i; struct pipe_ctx *pipe = NULL; bool otg_disabled[MAX_PIPES] = {false}; + struct dc_state *dc_state = NULL; for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) { + pipe = &context->res_ctx.pipe_ctx[i]; + dc_state = context; + } else { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + dc_state = dc->current_state; + } if (!resource_is_pipe_type(pipe, OTG_MASTER)) continue; if ((pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal)) - && dc_state_get_pipe_subvp_type(dc->current_state, pipe) != SUBVP_PHANTOM) { + && dc_state_get_pipe_subvp_type(dc_state, pipe) != SUBVP_PHANTOM) { pipe->stream_res.tg->funcs->disable_crtc(pipe->stream_res.tg); reset_sync_context_for_pipe(dc, context, i); otg_disabled[i] = true; @@ -1235,7 +1245,10 @@ void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_ hws->ctx->dc->res_pool->dccg->funcs->trigger_dio_fifo_resync(hws->ctx->dc->res_pool->dccg); for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (i <= current_pipe_idx) + pipe = &context->res_ctx.pipe_ctx[i]; + else + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; if (otg_disabled[i]) { int opp_inst[MAX_PIPES] = { pipe->stream_res.opp->inst }; @@ -1583,7 +1596,7 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context) #ifdef CONFIG_DRM_AMD_DC_FP if (hws->funcs.resync_fifo_dccg_dio) - hws->funcs.resync_fifo_dccg_dio(hws, dc, context); + hws->funcs.resync_fifo_dccg_dio(hws, dc, context, i); #endif } } @@ -1717,6 +1730,28 @@ void dcn32_blank_phantom(struct dc *dc, hws->funcs.wait_for_blank_complete(opp); } +/* phantom stream id's can change often, but can be identical between contexts. +* This function checks for the condition the streams are identical to avoid +* redundant pipe transitions. +*/ +static bool is_subvp_phantom_topology_transition_seamless( + const struct dc_state *cur_ctx, + const struct dc_state *new_ctx, + const struct pipe_ctx *cur_pipe, + const struct pipe_ctx *new_pipe) +{ + enum mall_stream_type cur_pipe_type = dc_state_get_pipe_subvp_type(cur_ctx, cur_pipe); + enum mall_stream_type new_pipe_type = dc_state_get_pipe_subvp_type(new_ctx, new_pipe); + + const struct dc_stream_state *cur_paired_stream = dc_state_get_paired_subvp_stream(cur_ctx, cur_pipe->stream); + const struct dc_stream_state *new_paired_stream = dc_state_get_paired_subvp_stream(new_ctx, new_pipe->stream); + + return cur_pipe_type == SUBVP_PHANTOM && + cur_pipe_type == new_pipe_type && + cur_paired_stream && new_paired_stream && + cur_paired_stream->stream_id == new_paired_stream->stream_id; +} + bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc, const struct dc_state *cur_ctx, const struct dc_state *new_ctx) @@ -1735,7 +1770,8 @@ bool dcn32_is_pipe_topology_transition_seamless(struct dc *dc, continue; else if (resource_is_pipe_type(cur_pipe, OTG_MASTER)) { if (resource_is_pipe_type(new_pipe, OTG_MASTER)) - if (cur_pipe->stream->stream_id == new_pipe->stream->stream_id) + if (cur_pipe->stream->stream_id == new_pipe->stream->stream_id || + is_subvp_phantom_topology_transition_seamless(cur_ctx, new_ctx, cur_pipe, new_pipe)) /* OTG master with the same stream is seamless */ continue; } else if (resource_is_pipe_type(cur_pipe, OPP_HEAD)) { @@ -1821,3 +1857,13 @@ void dcn32_interdependent_update_lock(struct dc *dc, dc->hwss.pipe_control_lock(dc, pipe, false); } } + +void dcn32_program_outstanding_updates(struct dc *dc, + struct dc_state *context) +{ + struct hubbub *hubbub = dc->res_pool->hubbub; + + /* update compbuf if required */ + if (hubbub->funcs->program_compbuf_size) + hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h index db562e45d6ff..cac4a08b92a4 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h @@ -75,7 +75,7 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable); unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); -void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context); +void dcn32_resync_fifo_dccg_dio(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context, unsigned int current_pipe_idx); void dcn32_subvp_pipe_control_lock(struct dc *dc, struct dc_state *context, @@ -133,4 +133,8 @@ void dcn32_prepare_bandwidth(struct dc *dc, void dcn32_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); + +void dcn32_program_outstanding_updates(struct dc *dc, + struct dc_state *context); + #endif /* __DC_HWSS_DCN32_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 5c50458b12cb..3422b564ae98 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -120,11 +120,11 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .blank_phantom = dcn32_blank_phantom, .is_pipe_topology_transition_seamless = dcn32_is_pipe_topology_transition_seamless, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .program_outstanding_updates = dcn32_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn32_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, @@ -163,7 +163,6 @@ static const struct hwseq_private_funcs dcn32_private_funcs = { .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, - .populate_mcm_luts = dcn401_populate_mcm_luts, }; void dcn32_hw_sequencer_init_functions(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index e4f7078c1026..fbbb20b9dbee 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -235,7 +235,7 @@ void dcn35_init_hw(struct dc *dc) if (hws->funcs.enable_power_gating_plane) hws->funcs.enable_power_gating_plane(dc->hwseq, true); */ - if (res_pool->hubbub->funcs->dchubbub_init) + if (res_pool->hubbub && res_pool->hubbub->funcs->dchubbub_init) res_pool->hubbub->funcs->dchubbub_init(dc->res_pool->hubbub); /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which @@ -328,10 +328,10 @@ void dcn35_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) + if (dc->clk_mgr && dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); @@ -629,10 +629,10 @@ void dcn35_power_down_on_boot(struct dc *dc) if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwseq->funcs.edp_backlight_control && - dc->hwss.power_down && + dc->hwseq->funcs.power_down && dc->hwss.edp_power_control) { dc->hwseq->funcs.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { @@ -640,8 +640,8 @@ void dcn35_power_down_on_boot(struct dc *dc) if (link->link_enc && link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + dc->hwseq->funcs.power_down) { + dc->hwseq->funcs.power_down(dc); break; } @@ -771,6 +771,8 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } @@ -1022,9 +1024,6 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (!hpo_frl_stream_enc_acquired && !hpo_dp_stream_enc_acquired) update_state->pg_res_update[PG_HPO] = true; - if (hpo_frl_stream_enc_acquired) - update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true; - update_state->pg_res_update[PG_DWB] = true; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -1039,7 +1038,7 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (pipe_ctx->plane_res.hubp) update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->plane_res.hubp->inst] = false; - if (pipe_ctx->plane_res.dpp) + if (pipe_ctx->plane_res.dpp && pipe_ctx->plane_res.hubp) update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->plane_res.hubp->inst] = false; if (pipe_ctx->plane_res.dpp || pipe_ctx->stream_res.opp) @@ -1461,10 +1460,9 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, for (i = 0; i < num_pipes; i++) { if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { - struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; - struct dc *dc = pipe_ctx[i]->stream->ctx->dc; - - if (dc->debug.static_screen_wait_frames) { + if (pipe_ctx[i]->stream && pipe_ctx[i]->stream->ctx->dc->debug.static_screen_wait_frames) { + struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; + struct dc *dc = pipe_ctx[i]->stream->ctx->dc; unsigned int frame_rate = timing->pix_clk_100hz / (timing->h_total * timing->v_total); if (frame_rate >= 120 && dc->caps.ips_support && diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index 428912f37129..2bbf1fef94fd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -101,7 +101,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -124,11 +123,11 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .root_clock_control = dcn35_root_clock_control, .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .program_outstanding_updates = dcn32_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn35_private_funcs = { .init_pipes = dcn35_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 55e791552bca..d00822e8daa5 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -100,7 +100,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .power_down = dce110_power_down, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -123,11 +122,12 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .root_clock_control = dcn35_root_clock_control, .set_long_vtotal = dcn35_set_long_vblank, .calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider, + .program_outstanding_updates = dcn32_program_outstanding_updates, + .setup_hpo_hw_control = dcn35_setup_hpo_hw_control, }; static const struct hwseq_private_funcs dcn351_private_funcs = { .init_pipes = dcn35_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 2c50c0f745a0..0b743669f23b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -221,8 +221,9 @@ void dcn401_init_hw(struct dc *dc) int edp_num; uint32_t backlight = MAX_BACKLIGHT_LEVEL; uint32_t user_level = MAX_BACKLIGHT_LEVEL; + int current_dchub_ref_freq = 0; - if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) { + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->init_clocks) { dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); // mark dcmode limits present if any clock has distinct AC and DC values from SMU @@ -264,6 +265,8 @@ void dcn401_init_hw(struct dc *dc) dc->ctx->dc_bios->fw_info.pll_info.crystal_frequency, &res_pool->ref_clocks.dccg_ref_clock_inKhz); + current_dchub_ref_freq = res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000; + (res_pool->hubbub->funcs->get_dchub_ref_freq)(res_pool->hubbub, res_pool->ref_clocks.dccg_ref_clock_inKhz, &res_pool->ref_clocks.dchub_ref_clock_inKhz); @@ -354,10 +357,10 @@ void dcn401_init_hw(struct dc *dc) if (edp_link->link_enc->funcs->is_dig_enabled && edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && dc->hwss.edp_backlight_control && - dc->hwss.power_down && + hws->funcs.power_down && dc->hwss.edp_power_control) { dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); + hws->funcs.power_down(dc); dc->hwss.edp_power_control(edp_link, false); } } @@ -367,8 +370,8 @@ void dcn401_init_hw(struct dc *dc) if (link->link_enc->funcs->is_dig_enabled && link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); + hws->funcs.power_down) { + hws->funcs.power_down(dc); break; } @@ -413,12 +416,9 @@ void dcn401_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); - if (dc->clk_mgr->funcs->notify_wm_ranges) + if (dc->clk_mgr && dc->clk_mgr->funcs && dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); - if (dc->clk_mgr->funcs->set_hard_max_memclk && !dc->clk_mgr->dc_mode_softmax_enabled) - dc->clk_mgr->funcs->set_hard_max_memclk(dc->clk_mgr); - if (dc->res_pool->hubbub->funcs->force_pstate_change_control) dc->res_pool->hubbub->funcs->force_pstate_change_control( dc->res_pool->hubbub, false, false); @@ -436,9 +436,12 @@ void dcn401_init_hw(struct dc *dc) dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver > 0; dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; dc->debug.fams2_config.bits.enable &= dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver == 2; - if (!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) { - /* update bounding box if FAMS2 disabled */ - dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); + if ((!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) + || res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000 != current_dchub_ref_freq) { + /* update bounding box if FAMS2 disabled, or if dchub clk has changed */ + if (dc->clk_mgr) + dc->res_pool->funcs->update_bw_bounding_box(dc, + dc->clk_mgr->bw_params); } } } @@ -498,6 +501,7 @@ void dcn401_populate_mcm_luts(struct dc *dc, enum MCM_LUT_XABLE lut3d_xable = MCM_LUT_DISABLE; enum MCM_LUT_XABLE lut1d_xable = MCM_LUT_DISABLE; bool is_17x17x17 = true; + bool rval; dcn401_get_mcm_lut_xable_from_pipe_ctx(dc, pipe_ctx, &shaper_xable, &lut3d_xable, &lut1d_xable); @@ -507,11 +511,10 @@ void dcn401_populate_mcm_luts(struct dc *dc, if (mcm_luts.lut1d_func->type == TF_TYPE_HWPWL) m_lut_params.pwl = &mcm_luts.lut1d_func->pwl; else if (mcm_luts.lut1d_func->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format( - dc->ctx, + rval = cm3_helper_translate_curve_to_hw_format( mcm_luts.lut1d_func, &dpp_base->regamma_params, false); - m_lut_params.pwl = &dpp_base->regamma_params; + m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL; } if (m_lut_params.pwl) { if (mpc->funcs->populate_lut) @@ -528,11 +531,10 @@ void dcn401_populate_mcm_luts(struct dc *dc, m_lut_params.pwl = &mcm_luts.shaper->pwl; else if (mcm_luts.shaper->type == TF_TYPE_DISTRIBUTED_POINTS) { ASSERT(false); - cm_helper_translate_curve_to_hw_format( - dc->ctx, + rval = cm3_helper_translate_curve_to_hw_format( mcm_luts.shaper, &dpp_base->regamma_params, true); - m_lut_params.pwl = &dpp_base->regamma_params; + m_lut_params.pwl = rval ? &dpp_base->regamma_params : NULL; } if (m_lut_params.pwl) { if (mpc->funcs->populate_lut) @@ -668,47 +670,40 @@ bool dcn401_set_mcm_luts(struct pipe_ctx *pipe_ctx, struct dpp *dpp_base = pipe_ctx->plane_res.dpp; int mpcc_id = pipe_ctx->plane_res.hubp->inst; struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; - bool result = true; + bool result; const struct pwl_params *lut_params = NULL; + bool rval; mpc->funcs->set_movable_cm_location(mpc, MPCC_MOVABLE_CM_LOCATION_BEFORE, mpcc_id); pipe_ctx->plane_state->mcm_location = MPCC_MOVABLE_CM_LOCATION_BEFORE; // 1D LUT - if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { - if (plane_state->blend_tf.type == TF_TYPE_HWPWL) - lut_params = &plane_state->blend_tf.pwl; - else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - &plane_state->blend_tf, - &dpp_base->regamma_params, false); - lut_params = &dpp_base->regamma_params; - } - result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id); - lut_params = NULL; + if (plane_state->blend_tf.type == TF_TYPE_HWPWL) + lut_params = &plane_state->blend_tf.pwl; + else if (plane_state->blend_tf.type == TF_TYPE_DISTRIBUTED_POINTS) { + rval = cm3_helper_translate_curve_to_hw_format(&plane_state->blend_tf, + &dpp_base->regamma_params, false); + lut_params = rval ? &dpp_base->regamma_params : NULL; } + result = mpc->funcs->program_1dlut(mpc, lut_params, mpcc_id); + lut_params = NULL; // Shaper - if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { - if (plane_state->in_shaper_func.type == TF_TYPE_HWPWL) - lut_params = &plane_state->in_shaper_func.pwl; - else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) { - // TODO: dpp_base replace - ASSERT(false); - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - &plane_state->in_shaper_func, - &dpp_base->shaper_params, true); - lut_params = &dpp_base->shaper_params; - } - - result = mpc->funcs->program_shaper(mpc, lut_params, mpcc_id); + if (plane_state->in_shaper_func.type == TF_TYPE_HWPWL) + lut_params = &plane_state->in_shaper_func.pwl; + else if (plane_state->in_shaper_func.type == TF_TYPE_DISTRIBUTED_POINTS) { + // TODO: dpp_base replace + rval = cm3_helper_translate_curve_to_hw_format(&plane_state->in_shaper_func, + &dpp_base->shaper_params, true); + lut_params = rval ? &dpp_base->shaper_params : NULL; } + result &= mpc->funcs->program_shaper(mpc, lut_params, mpcc_id); // 3D - if (plane_state->mcm_shaper_3dlut_setting == DC_CM2_SHAPER_3DLUT_SETTING_BYPASS_ALL) { + if (mpc->funcs->program_3dlut) { if (plane_state->lut3d_func.state.bits.initialized == 1) - result = mpc->funcs->program_3dlut(mpc, &plane_state->lut3d_func.lut_3d, mpcc_id); + result &= mpc->funcs->program_3dlut(mpc, &plane_state->lut3d_func.lut_3d, mpcc_id); else - result = mpc->funcs->program_3dlut(mpc, NULL, mpcc_id); + result &= mpc->funcs->program_3dlut(mpc, NULL, mpcc_id); } return result; @@ -742,7 +737,9 @@ bool dcn401_set_output_transfer_func(struct dc *dc, } } - mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + if (mpc->funcs->set_output_gamma) + mpc->funcs->set_output_gamma(mpc, mpcc_id, params); + return ret; } @@ -871,6 +868,7 @@ enum dc_status dcn401_enable_stream_timing( pipe_ctx->pipe_dlg_param.vstartup_start, pipe_ctx->pipe_dlg_param.vupdate_offset, pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->pipe_dlg_param.pstate_keepout, pipe_ctx->stream->signal, true); @@ -1115,10 +1113,10 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx) .mirror = pipe_ctx->plane_state->horizontal_mirror, .stream = pipe_ctx->stream }; + struct rect odm_slice_src = { 0 }; bool odm_combine_on = (pipe_ctx->next_odm_pipe != NULL) || (pipe_ctx->prev_odm_pipe != NULL); int prev_odm_width = 0; - int prev_odm_offset = 0; struct pipe_ctx *prev_odm_pipe = NULL; bool mpc_combine_on = false; int bottom_pipe_x_pos = 0; @@ -1183,12 +1181,12 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx) prev_odm_pipe = pipe_ctx->prev_odm_pipe; while (prev_odm_pipe != NULL) { - prev_odm_width += prev_odm_pipe->plane_res.scl_data.recout.width; - prev_odm_offset += prev_odm_pipe->plane_res.scl_data.recout.x; + odm_slice_src = resource_get_odm_slice_src_rect(prev_odm_pipe); + prev_odm_width += odm_slice_src.width; prev_odm_pipe = prev_odm_pipe->prev_odm_pipe; } - x_pos -= (prev_odm_width + prev_odm_offset); + x_pos -= (prev_odm_width); } /* If the position is negative then we need to add to the hotspot @@ -1311,8 +1309,10 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) for (i = 0; i < dc->current_state->stream_count; i++) { /* MALL SS messaging is not supported with PSR at this time */ if (dc->current_state->streams[i] != NULL && - dc->current_state->streams[i]->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) + dc->current_state->streams[i]->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED) { + DC_LOG_MALL("MALL SS not supported with PSR at this time\n"); return false; + } } memset(&cmd, 0, sizeof(cmd)); @@ -1322,8 +1322,9 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) if (enable) { if (dcn401_check_no_memory_request_for_cab(dc)) { /* 1. Check no memory request case for CAB. - * If no memory request case, send CAB_ACTION NO_DF_REQ DMUB message + * If no memory request case, send CAB_ACTION NO_DCN_REQ DMUB message */ + DC_LOG_MALL("sending CAB action NO_DCN_REQ\n"); cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_DCN_REQ; } else { /* 2. Check if all surfaces can fit in CAB. @@ -1351,13 +1352,16 @@ bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable) if (ways <= dc->caps.cache_num_ways && !mall_ss_unsupported) { cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB; cmd.cab.cab_alloc_ways = ways; + DC_LOG_MALL("cab allocation: %d ways. CAB action: DCN_SS_FIT_IN_CAB\n", ways); } else { cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_NOT_FIT_IN_CAB; + DC_LOG_MALL("frame does not fit in CAB: %d ways required. CAB action: DCN_SS_NOT_FIT_IN_CAB\n", ways); } } } else { /* Disable CAB */ cmd.cab.header.sub_type = DMUB_CMD__CAB_NO_IDLE_OPTIMIZATION; + DC_LOG_MALL("idle optimization disabled\n"); } dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); @@ -1395,10 +1399,10 @@ void dcn401_prepare_bandwidth(struct dc *dc, { struct hubbub *hubbub = dc->res_pool->hubbub; bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; - unsigned int compbuf_size_kb = 0; + unsigned int compbuf_size = 0; - /* Any transition into or out of a FAMS config should disable MCLK switching first to avoid hangs */ - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + /* Any transition into P-State support should disable MCLK switching first to avoid hangs */ + if (p_state_change_support) { dc->optimized_required = true; context->bw_ctx.bw.dcn.clk.p_state_change_support = false; } @@ -1425,10 +1429,10 @@ void dcn401_prepare_bandwidth(struct dc *dc, /* decrease compbuf size */ if (hubbub->funcs->program_compbuf_segments) { - compbuf_size_kb = context->bw_ctx.bw.dcn.arb_regs.compbuf_size; - dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); + compbuf_size = context->bw_ctx.bw.dcn.arb_regs.compbuf_size; + dc->wm_optimized_required |= (compbuf_size != dc->current_state->bw_ctx.bw.dcn.arb_regs.compbuf_size); - hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size_kb, false); + hubbub->funcs->program_compbuf_segments(hubbub, compbuf_size, false); } if (dc->debug.fams2_config.bits.enable) { @@ -1437,7 +1441,7 @@ void dcn401_prepare_bandwidth(struct dc *dc, dcn401_fams2_global_control_lock(dc, context, false); } - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + if (p_state_change_support != context->bw_ctx.bw.dcn.clk.p_state_change_support) { /* After disabling P-State, restore the original value to ensure we get the correct P-State * on the next optimize. */ context->bw_ctx.bw.dcn.clk.p_state_change_support = p_state_change_support; @@ -1530,7 +1534,7 @@ void dcn401_fams2_update_config(struct dc *dc, struct dc_state *context, bool en if (!dc->ctx || !dc->ctx->dmub_srv || !dc->debug.fams2_config.bits.enable) return; - fams2_required = context->bw_ctx.bw.dcn.fams2_stream_count > 0; + fams2_required = context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable; dc_dmub_srv_fams2_update_config(dc, context, enable && fams2_required); } @@ -1542,7 +1546,6 @@ static void update_dsc_for_odm_change(struct dc *dc, struct dc_state *context, struct pipe_ctx *old_pipe; struct pipe_ctx *new_pipe; struct pipe_ctx *old_opp_heads[MAX_PIPES]; - struct dccg *dccg = dc->res_pool->dccg; struct pipe_ctx *old_otg_master; int old_opp_head_count = 0; @@ -1568,12 +1571,9 @@ static void update_dsc_for_odm_change(struct dc *dc, struct dc_state *context, for (i = 0; i < old_opp_head_count; i++) { old_pipe = old_opp_heads[i]; new_pipe = &context->res_ctx.pipe_ctx[old_pipe->pipe_idx]; - if (old_pipe->stream_res.dsc && !new_pipe->stream_res.dsc) { - dccg->funcs->set_dto_dscclk(dccg, - old_pipe->stream_res.dsc->inst, false); + if (old_pipe->stream_res.dsc && !new_pipe->stream_res.dsc) old_pipe->stream_res.dsc->funcs->dsc_disconnect( old_pipe->stream_res.dsc); - } } } } @@ -1659,7 +1659,7 @@ void dcn401_hardware_release(struct dc *dc) */ if (dc->current_state) { if ((!dc->clk_mgr->clks.p_state_change_support || - dc->current_state->bw_ctx.bw.dcn.fams2_stream_count > 0) && + dc->current_state->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) && dc->res_pool->hubbub->funcs->force_pstate_change_control) dc->res_pool->hubbub->funcs->force_pstate_change_control( dc->res_pool->hubbub, true, true); @@ -1669,3 +1669,104 @@ void dcn401_hardware_release(struct dc *dc) } } +void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master) +{ + struct pipe_ctx *opp_heads[MAX_PIPES]; + struct pipe_ctx *dpp_pipes[MAX_PIPES]; + struct hubbub *hubbub = dc->res_pool->hubbub; + int dpp_count = 0; + + if (!otg_master->stream) + return; + + int slice_count = resource_get_opp_heads_for_otg_master(otg_master, + &context->res_ctx, opp_heads); + + for (int slice_idx = 0; slice_idx < slice_count; slice_idx++) { + if (opp_heads[slice_idx]->plane_state) { + dpp_count = resource_get_dpp_pipes_for_opp_head( + opp_heads[slice_idx], + &context->res_ctx, + dpp_pipes); + for (int dpp_idx = 0; dpp_idx < dpp_count; dpp_idx++) { + struct pipe_ctx *dpp_pipe = dpp_pipes[dpp_idx]; + if (dpp_pipe && hubbub && + dpp_pipe->plane_res.hubp && + hubbub->funcs->wait_for_det_update) + hubbub->funcs->wait_for_det_update(hubbub, dpp_pipe->plane_res.hubp->inst); + } + } + } +} + +void dcn401_interdependent_update_lock(struct dc *dc, + struct dc_state *context, bool lock) +{ + unsigned int i = 0; + struct pipe_ctx *pipe = NULL; + struct timing_generator *tg = NULL; + bool pipe_unlocked[MAX_PIPES] = {0}; + + if (lock) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + tg = pipe->stream_res.tg; + + if (!resource_is_pipe_type(pipe, OTG_MASTER) || + !tg->funcs->is_tg_enabled(tg) || + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) + continue; + dc->hwss.pipe_control_lock(dc, pipe, true); + } + } else { + /* Unlock pipes based on the change in DET allocation instead of pipe index + * Prevents over allocation of DET during unlock process + * e.g. 2 pipe config with different streams with a max of 20 DET segments + * Before: After: + * - Pipe0: 10 DET segments - Pipe0: 12 DET segments + * - Pipe1: 10 DET segments - Pipe1: 8 DET segments + * If Pipe0 gets updated first, 22 DET segments will be allocated + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + tg = pipe->stream_res.tg; + int current_pipe_idx = i; + + if (!resource_is_pipe_type(pipe, OTG_MASTER) || + !tg->funcs->is_tg_enabled(tg) || + dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) { + pipe_unlocked[i] = true; + continue; + } + + // If the same stream exists in old context, ensure the OTG_MASTER pipes for the same stream get compared + struct pipe_ctx *old_otg_master = resource_get_otg_master_for_stream(&dc->current_state->res_ctx, pipe->stream); + + if (old_otg_master) + current_pipe_idx = old_otg_master->pipe_idx; + if (resource_calculate_det_for_stream(context, pipe) < + resource_calculate_det_for_stream(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[current_pipe_idx])) { + dc->hwss.pipe_control_lock(dc, pipe, false); + pipe_unlocked[i] = true; + dcn401_wait_for_det_buffer_update(dc, context, pipe); + } + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (pipe_unlocked[i]) + continue; + pipe = &context->res_ctx.pipe_ctx[i]; + dc->hwss.pipe_control_lock(dc, pipe, false); + } + } +} + +void dcn401_program_outstanding_updates(struct dc *dc, + struct dc_state *context) +{ + struct hubbub *hubbub = dc->res_pool->hubbub; + + /* update compbuf if required */ + if (hubbub->funcs->program_compbuf_segments) + hubbub->funcs->program_compbuf_segments(hubbub, context->bw_ctx.bw.dcn.arb_regs.compbuf_size, true); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 8e9c1c17aa66..a27e62081685 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -81,4 +81,7 @@ void dcn401_hardware_release(struct dc *dc); void dcn401_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); void adjust_hotspot_between_slices_for_2x_magnify(uint32_t cursor_width, struct dc_cursor_position *pos_cpy); +void dcn401_wait_for_det_buffer_update(struct dc *dc, struct dc_state *context, struct pipe_ctx *otg_master); +void dcn401_interdependent_update_lock(struct dc *dc, struct dc_state *context, bool lock); +void dcn401_program_outstanding_updates(struct dc *dc, struct dc_state *context); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 6a768702c7bd..a2ca07235c83 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -38,7 +38,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .disable_audio_stream = dce110_disable_audio_stream, .disable_plane = dcn20_disable_plane, .pipe_control_lock = dcn20_pipe_control_lock, - .interdependent_update_lock = dcn32_interdependent_update_lock, + .interdependent_update_lock = dcn401_interdependent_update_lock, .cursor_lock = dcn10_cursor_lock, .prepare_bandwidth = dcn401_prepare_bandwidth, .optimize_bandwidth = dcn401_optimize_bandwidth, @@ -99,12 +99,11 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .fams2_global_control_lock = dcn401_fams2_global_control_lock, .fams2_update_config = dcn401_fams2_update_config, .fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast, - .power_down = dce110_power_down, + .program_outstanding_updates = dcn401_program_outstanding_updates, }; static const struct hwseq_private_funcs dcn401_private_funcs = { .init_pipes = dcn10_init_pipes, - .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, .set_input_transfer_func = dcn32_set_input_transfer_func, @@ -115,8 +114,6 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .reset_hw_ctx_wrap = dcn20_reset_hw_ctx_wrap, .enable_stream_timing = dcn401_enable_stream_timing, .edp_backlight_control = dce110_edp_backlight_control, - .disable_stream_gating = dcn20_disable_stream_gating, - .enable_stream_gating = dcn20_enable_stream_gating, .setup_vupdate_interrupt = dcn20_setup_vupdate_interrupt, .did_underflow_occur = dcn10_did_underflow_occur, .init_blank = dcn32_init_blank, @@ -136,12 +133,11 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .dccg_init = dcn20_dccg_init, .set_mcm_luts = dcn401_set_mcm_luts, .program_mall_pipe_config = dcn32_program_mall_pipe_config, - .update_force_pstate = dcn32_update_force_pstate, .update_mall_sel = dcn32_update_mall_sel, .calculate_dccg_k1_k2_values = NULL, .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, - .populate_mcm_luts = dcn401_populate_mcm_luts, + .populate_mcm_luts = NULL, }; void dcn401_hw_sequencer_init_functions(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index d05be65a2256..ac9205625623 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -240,7 +240,6 @@ struct hw_sequencer_funcs { void (*program_triplebuffer)(const struct dc *dc, struct pipe_ctx *pipe_ctx, bool enableTripleBuffer); void (*update_pending_status)(struct pipe_ctx *pipe_ctx); - void (*power_down)(struct dc *dc); void (*update_dsc_pg)(struct dc *dc, struct dc_state *context, bool safe_to_disable); /* Pipe Lock Related */ @@ -460,6 +459,9 @@ struct hw_sequencer_funcs { bool enable); void (*fams2_global_control_lock_fast)(union block_sequence_params *params); void (*set_long_vtotal)(struct pipe_ctx **pipe_ctx, int num_pipes, uint32_t v_total_min, uint32_t v_total_max); + void (*program_outstanding_updates)(struct dc *dc, + struct dc_state *context); + void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable); }; void color_space_to_black_color( @@ -520,6 +522,21 @@ void hwss_build_fast_sequence(struct dc *dc, struct dc_stream_status *stream_status, struct dc_state *context); +void hwss_wait_for_all_blank_complete(struct dc *dc, + struct dc_state *context); + +void hwss_wait_for_odm_update_pending_complete(struct dc *dc, + struct dc_state *context); + +void hwss_wait_for_no_pipes_pending(struct dc *dc, + struct dc_state *context); + +void hwss_wait_for_outstanding_hw_updates(struct dc *dc, + struct dc_state *dc_context); + +void hwss_process_outstanding_hw_updates(struct dc *dc, + struct dc_state *dc_context); + void hwss_send_dmcub_cmd(union block_sequence_params *params); void hwss_program_manual_trigger(union block_sequence_params *params); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 7ac3f2a09487..0ac675456979 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -76,8 +76,6 @@ struct hwseq_private_funcs { void (*enable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx); void (*init_pipes)(struct dc *dc, struct dc_state *context); void (*reset_hw_ctx_wrap)(struct dc *dc, struct dc_state *context); - void (*update_plane_addr)(const struct dc *dc, - struct pipe_ctx *pipe_ctx); void (*plane_atomic_disconnect)(struct dc *dc, struct dc_state *state, struct pipe_ctx *pipe_ctx); @@ -170,7 +168,8 @@ struct hwseq_private_funcs { unsigned int *k1_div, unsigned int *k2_div); void (*resync_fifo_dccg_dio)(struct dce_hwseq *hws, struct dc *dc, - struct dc_state *context); + struct dc_state *context, + unsigned int current_pipe_idx); enum dc_status (*apply_single_controller_ctx_to_hw)( struct pipe_ctx *pipe_ctx, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 4c8e6436c7e1..bfb8b8502d20 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -534,8 +534,8 @@ struct dcn_bw_output { unsigned int legacy_svp_drr_stream_index; bool legacy_svp_drr_stream_index_valid; struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; + struct dmub_cmd_fams2_global_config fams2_global_config; struct dmub_fams2_stream_static_state fams2_stream_params[DML2_MAX_PLANES]; - unsigned fams2_stream_count; struct dml2_display_arb_regs arb_regs; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h b/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h index b6203253111c..8c18efc2aa70 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h @@ -46,6 +46,8 @@ struct audio_funcs { const struct audio_info *audio_info, const struct audio_dp_link_info *dp_link_info); + void (*az_disable_hbr_audio)(struct audio *audio); + void (*wall_dto_setup)(struct audio *audio, enum signal_type signal, const struct audio_crtc_info *crtc_info, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index d5fefce3e74b..2d06067ff36d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -29,9 +29,6 @@ #include "dc.h" #include "dm_pp_smu.h" -#define DCN_MINIMUM_DISPCLK_Khz 100000 -#define DCN_MINIMUM_DPPCLK_Khz 100000 - /* Constants */ #define DDR4_DRAM_WIDTH 64 #define WM_A 0 @@ -180,6 +177,7 @@ struct clk_state_registers_and_bypass { uint32_t dispclk; uint32_t dppclk; uint32_t dtbclk; + uint32_t fclk; uint32_t dppclk_bypass; uint32_t dcfclk_bypass; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index 12282f96dfe1..c2dd061892f4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -191,7 +191,8 @@ enum dentist_divider_range { CLK_SR_DCN401(CLK0_CLK1_DFS_CNTL, CLK01, 0), \ CLK_SR_DCN401(CLK0_CLK2_DFS_CNTL, CLK01, 0), \ CLK_SR_DCN401(CLK0_CLK3_DFS_CNTL, CLK01, 0), \ - CLK_SR_DCN401(CLK0_CLK4_DFS_CNTL, CLK01, 0) + CLK_SR_DCN401(CLK0_CLK4_DFS_CNTL, CLK01, 0), \ + CLK_SR_DCN401(CLK2_CLK2_DFS_CNTL, CLK20, 0) #define CLK_COMMON_MASK_SH_LIST_DCN401(mask_sh) \ CLK_COMMON_MASK_SH_LIST_DCN321(mask_sh) @@ -235,6 +236,7 @@ struct clk_mgr_registers { uint32_t CLK1_CLK2_DFS_CNTL; uint32_t CLK1_CLK3_DFS_CNTL; uint32_t CLK1_CLK4_DFS_CNTL; + uint32_t CLK2_CLK2_DFS_CNTL; uint32_t CLK1_CLK0_CURRENT_CNT; uint32_t CLK1_CLK1_CURRENT_CNT; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index 4fb1aacee894..d619eb229a62 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -211,10 +211,7 @@ struct dccg_funcs { struct dccg *dccg, enum streamclk_source src, uint32_t otg_inst); - void (*set_dto_dscclk)( - struct dccg *dccg, - uint32_t dsc_inst, - bool enable); + void (*set_dto_dscclk)(struct dccg *dccg, uint32_t dsc_inst); void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index dd2b2864876c..67c32401893e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -227,6 +227,7 @@ struct hubbub_funcs { void (*get_mall_en)(struct hubbub *hubbub, unsigned int *mall_in_use); void (*program_det_segments)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg); void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); + void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index 27bba47186e9..41c76ba9ba56 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -217,12 +217,13 @@ enum optc_dsc_mode { }; struct dc_bias_and_scale { - uint16_t scale_red; - uint16_t bias_red; - uint16_t scale_green; - uint16_t bias_green; - uint16_t scale_blue; - uint16_t bias_blue; + uint32_t scale_red; + uint32_t bias_red; + uint32_t scale_green; + uint32_t bias_green; + uint32_t scale_blue; + uint32_t bias_blue; + bool bias_and_scale_valid; }; enum test_pattern_dyn_range { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index 5f6c7daa14d9..a8b44f398ce6 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -63,7 +63,7 @@ union dcn_watermark_set { struct dml2_dchub_watermark_regs b; struct dml2_dchub_watermark_regs c; struct dml2_dchub_watermark_regs d; - } dcn4; //dcn4+ + } dcn4x; //dcn4+ }; struct dce_watermarks { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h index 287bf8a90ff6..03cbcbb36f1c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h @@ -65,6 +65,7 @@ struct optc { int vupdate_offset; int vupdate_width; int vready_offset; + int pstate_keepout; struct dc_crtc_timing orginal_patched_timing; enum signal_type signal; }; @@ -110,6 +111,7 @@ void optc1_program_timing(struct timing_generator *optc, int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios); @@ -127,7 +129,8 @@ void optc1_program_global_sync(struct timing_generator *optc, int vready_offset, int vstartup_start, int vupdate_offset, - int vupdate_width); + int vupdate_width, + int pstate_keepout); bool optc1_disable_crtc(struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index e5e11c84e9e2..fe7f3137f228 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -271,7 +271,9 @@ struct stream_encoder_funcs { struct stream_encoder *enc, unsigned int pix_per_container); void (*enable_fifo)(struct stream_encoder *enc); void (*disable_fifo)(struct stream_encoder *enc); + bool (*is_fifo_enabled)(struct stream_encoder *enc); void (*map_stream_to_link)(struct stream_encoder *enc, uint32_t stream_enc_inst, uint32_t link_enc_inst); + uint32_t (*get_pixels_per_cycle)(struct stream_encoder *enc); }; struct hpo_dp_stream_encoder_state { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 0f453452234c..3d4c8bd42b49 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -172,6 +172,7 @@ struct timing_generator_funcs { int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios ); @@ -256,7 +257,8 @@ struct timing_generator_funcs { int vready_offset, int vstartup_start, int vupdate_offset, - int vupdate_width); + int vupdate_width, + int pstate_keepout); void (*enable_optc_clock)(struct timing_generator *tg, bool enable); void (*program_stereo)(struct timing_generator *tg, const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 28da1dddf0a0..45262cba675e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -245,16 +245,6 @@ struct transform_funcs { void (*set_cursor_attributes)( struct transform *xfm_base, const struct dc_cursor_attributes *attr); - - bool (*transform_program_blnd_lut)( - struct transform *xfm, - const struct pwl_params *params); - bool (*transform_program_shaper_lut)( - struct transform *xfm, - const struct pwl_params *params); - bool (*transform_program_3dlut)( - struct transform *xfm, - struct tetrahedral_params *params); }; const uint16_t *get_filter_2tap_16p(void); diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 96d40d33a1f9..cd1157d225ab 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -639,4 +639,11 @@ struct dscl_prog_data *resource_get_dscl_prog_data(struct pipe_ctx *pipe_ctx); * @dml2_options: struct to hold callbacks */ void resource_init_common_dml2_callbacks(struct dc *dc, struct dml2_configuration_options *dml2_options); + +/* + *Calculate total DET allocated for all pipes for a given OTG_MASTER pipe + */ +int resource_calculate_det_for_stream(struct dc_state *state, struct pipe_ctx *otg_master); + +bool resource_is_hpo_acquired(struct dc_state *context); #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 555c1c484cfd..ff8fe1a94965 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -67,6 +67,8 @@ static void dp_retrain_link_dp_test(struct dc_link *link, { struct pipe_ctx *pipes[MAX_PIPES]; struct dc_state *state = link->dc->current_state; + bool was_hpo_acquired = resource_is_hpo_acquired(link->dc->current_state); + bool is_hpo_acquired; uint8_t count; int i; @@ -83,6 +85,12 @@ static void dp_retrain_link_dp_test(struct dc_link *link, pipes[i]); } + if (link->dc->hwss.setup_hpo_hw_control) { + is_hpo_acquired = resource_is_hpo_acquired(state); + if (was_hpo_acquired != is_hpo_acquired) + link->dc->hwss.setup_hpo_hw_control(link->dc->hwseq, is_hpo_acquired); + } + for (i = count-1; i >= 0; i--) link_set_dpms_on(state, pipes[i]); } @@ -804,8 +812,11 @@ bool dp_set_test_pattern( break; } + if (!pipe_ctx->stream) + return false; + if (pipe_ctx->stream_res.tg->funcs->lock_doublebuffer_enable) { - if (pipe_ctx->stream && should_use_dmub_lock(pipe_ctx->stream->link)) { + if (should_use_dmub_lock(pipe_ctx->stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c index b76737b7b9e4..3e47a6735912 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c @@ -74,7 +74,10 @@ void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx) struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link); struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; - if (stream_enc && stream_enc->funcs->disable_fifo) + if (!stream_enc) + return; + + if (stream_enc->funcs->disable_fifo) stream_enc->funcs->disable_fifo(stream_enc); if (stream_enc->funcs->set_input_mode) stream_enc->funcs->set_input_mode(stream_enc, 0); diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c index e1257404357b..cec68c5dba13 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_dp.c @@ -28,6 +28,8 @@ #include "dccg.h" #include "clk_mgr.h" +#define DC_LOGGER link->ctx->logger + void set_hpo_dp_throttled_vcp_size(struct pipe_ctx *pipe_ctx, struct fixed31_32 throttled_vcp_size) { @@ -108,6 +110,11 @@ void enable_hpo_dp_link_output(struct dc_link *link, enum clock_source_id clock_source, const struct dc_link_settings *link_settings) { + if (!link_res->hpo_dp_link_enc) { + DC_LOG_ERROR("%s: invalid hpo_dp_link_enc\n", __func__); + return; + } + if (link->dc->res_pool->dccg->funcs->set_symclk32_le_root_clock_gating) link->dc->res_pool->dccg->funcs->set_symclk32_le_root_clock_gating( link->dc->res_pool->dccg, @@ -124,6 +131,11 @@ void disable_hpo_dp_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal) { + if (!link_res->hpo_dp_link_enc) { + DC_LOG_ERROR("%s: invalid hpo_dp_link_enc\n", __func__); + return; + } + link_res->hpo_dp_link_enc->funcs->link_disable(link_res->hpo_dp_link_enc); link_res->hpo_dp_link_enc->funcs->disable_link_phy( link_res->hpo_dp_link_enc, signal); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index bba644024780..d21ee9d12d26 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -863,7 +863,6 @@ static bool detect_link_and_local_sink(struct dc_link *link, struct dc_sink *prev_sink = NULL; struct dpcd_caps prev_dpcd_caps; enum dc_connection_type new_connection_type = dc_connection_none; - enum dc_connection_type pre_connection_type = link->type; const uint32_t post_oui_delay = 30; // 30ms DC_LOGGER_INIT(link->ctx->logger); @@ -965,7 +964,6 @@ static bool detect_link_and_local_sink(struct dc_link *link, } if (!detect_dp(link, &sink_caps, reason)) { - link->type = pre_connection_type; if (prev_sink) dc_sink_release(prev_sink); @@ -1191,8 +1189,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, //sink only can use supported link rate table, we are foreced to enable it if (link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN) link->panel_config.ilr.optimize_edp_link_rate = true; - if (edp_is_ilr_optimization_enabled(link)) - link->reported_link_cap.link_rate = get_max_link_rate_from_ilr_table(link); + link->reported_link_cap.link_rate = get_max_edp_link_rate(link); } } else { @@ -1299,8 +1296,7 @@ bool link_detect(struct dc_link *link, enum dc_detect_reason reason) link->dpcd_caps.is_mst_capable) is_delegated_to_mst_top_mgr = discover_dp_mst_topology(link, reason); - if (is_local_sink_detect_success && - pre_link_type == dc_connection_mst_branch && + if (pre_link_type == dc_connection_mst_branch && link->type != dc_connection_mst_branch) is_delegated_to_mst_top_mgr = link_reset_cur_dp_mst_topology(link); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 65607589495f..c4e03482ba9a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -817,17 +817,17 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) ASSERT(dsc_cfg.dc_dsc_cfg.num_slices_h % opp_cnt == 0); dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, dsc->inst); dsc->funcs->dsc_set_config(dsc, &dsc_cfg, &dsc_optc_cfg); dsc->funcs->dsc_enable(dsc, pipe_ctx->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, dsc->inst, true); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { struct display_stream_compressor *odm_dsc = odm_pipe->stream_res.dsc; + if (should_use_dto_dscclk) + dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst); odm_dsc->funcs->dsc_set_config(odm_dsc, &dsc_cfg, &dsc_optc_cfg); odm_dsc->funcs->dsc_enable(odm_dsc, odm_pipe->stream_res.opp->inst); - if (should_use_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_dsc->inst, true); } dsc_cfg.dc_dsc_cfg.num_slices_h *= opp_cnt; dsc_cfg.pic_width *= opp_cnt; @@ -879,19 +879,32 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) } /* disable DSC block */ - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, pipe_ctx->stream_res.dsc->inst, false); - pipe_ctx->stream_res.dsc->funcs->dsc_disconnect(pipe_ctx->stream_res.dsc); - if (dccg->funcs->set_ref_dscclk) - dccg->funcs->set_ref_dscclk(dccg, pipe_ctx->stream_res.dsc->inst); - pipe_ctx->stream_res.dsc->funcs->dsc_disable(pipe_ctx->stream_res.dsc); - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { - if (dccg->funcs->set_dto_dscclk) - dccg->funcs->set_dto_dscclk(dccg, odm_pipe->stream_res.dsc->inst, false); + for (odm_pipe = pipe_ctx; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.dsc->funcs->dsc_disconnect(odm_pipe->stream_res.dsc); + /* + * TODO - dsc_disconnect is a double buffered register. + * by the time we call dsc_disable, dsc may still remain + * connected to OPP. In this case OPTC will no longer + * get correct pixel data because DSCC is off. However + * we also can't wait for the disconnect pending + * complete, because this function can be called + * with/without OTG master lock acquired. When the lock + * is acquired we will never get pending complete until + * we release the lock later. So there is no easy way to + * solve this problem especially when the lock is + * acquired. DSC is a front end hw block it should be + * programmed as part of front end sequence, where the + * commit sequence without lock and update sequence + * with lock are completely separated. However because + * we are programming dsc as part of back end link + * programming sequence, we don't know if front end OPTC + * master lock is acquired. The back end should be + * agnostic to front end lock. DSC programming shouldn't + * belong to this sequence. + */ + odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc); if (dccg->funcs->set_ref_dscclk) dccg->funcs->set_ref_dscclk(dccg, odm_pipe->stream_res.dsc->inst); - odm_pipe->stream_res.dsc->funcs->dsc_disable(odm_pipe->stream_res.dsc); } } } @@ -2345,7 +2358,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); - else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && + else if (dc_is_dp_sst_signal(pipe_ctx->stream->signal) && dp_is_128b_132b_signal(pipe_ctx)) update_sst_payload(pipe_ctx, false); @@ -2578,7 +2591,7 @@ void link_set_dpms_on( if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) allocate_mst_payload(pipe_ctx); - else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && + else if (dc_is_dp_sst_signal(pipe_ctx->stream->signal) && dp_is_128b_132b_signal(pipe_ctx)) update_sst_payload(pipe_ctx, true); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 8246006857b3..5e1b5ab9fbc6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -385,7 +385,7 @@ static void link_destruct(struct dc_link *link) if (link->panel_cntl) link->panel_cntl->funcs->destroy(&link->panel_cntl); - if (link->link_enc) { + if (link->link_enc && !link->is_dig_mapping_flexible) { /* Update link encoder resource tracking variables. These are used for * the dynamic assignment of link encoders to streams. Virtual links * are not assigned encoder resources on creation. @@ -524,6 +524,7 @@ static bool construct_phy(struct dc_link *link, link->connector_signal = SIGNAL_TYPE_DVI_DUAL_LINK; break; case CONNECTOR_ID_DISPLAY_PORT: + case CONNECTOR_ID_MXM: case CONNECTOR_ID_USBC: link->connector_signal = SIGNAL_TYPE_DISPLAY_PORT; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 46bb7a855bc2..34a618a7278b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -212,6 +212,13 @@ static enum dc_link_rate linkRateInKHzToLinkRateMultiplier(uint32_t link_rate_in case 10000000: link_rate = LINK_RATE_UHBR10; // UHBR10 - 10.0 Gbps/Lane break; + case 13500000: + link_rate = LINK_RATE_UHBR13_5; // UHBR13.5 - 13.5 Gbps/Lane + break; + case 20000000: + link_rate = LINK_RATE_UHBR20; // UHBR20 - 20.0 Gbps/Lane + break; + default: link_rate = LINK_RATE_UNKNOWN; break; @@ -541,6 +548,23 @@ static enum dc_link_rate increase_link_rate(struct dc_link *link, } } +static void increase_edp_link_rate(struct dc_link *link, + struct dc_link_settings *current_link_setting) +{ + if (current_link_setting->use_link_rate_set) { + if (current_link_setting->link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { + current_link_setting->link_rate_set++; + current_link_setting->link_rate = + link->dpcd_caps.edp_supported_link_rates[current_link_setting->link_rate_set]; + } else { + current_link_setting->use_link_rate_set = false; + current_link_setting->link_rate = LINK_RATE_UHBR10; + } + } else { + current_link_setting->link_rate = increase_link_rate(link, current_link_setting->link_rate); + } +} + static bool decide_fallback_link_setting_max_bw_policy( struct dc_link *link, const struct dc_link_settings *max, @@ -759,14 +783,7 @@ bool edp_decide_link_settings(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; - current_link_setting.lane_count = - initial_link_setting.lane_count; - } else - break; + increase_edp_link_rate(link, ¤t_link_setting); } } return false; @@ -818,9 +835,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, if (policy) { /* minimize lane */ if (current_link_setting.link_rate < max_link_rate) { - current_link_setting.link_rate = - increase_link_rate(link, - current_link_setting.link_rate); + increase_edp_link_rate(link, ¤t_link_setting); } else { if (current_link_setting.lane_count < link->verified_link_cap.lane_count) { @@ -839,9 +854,7 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - current_link_setting.link_rate = - increase_link_rate(link, - current_link_setting.link_rate); + increase_edp_link_rate(link, ¤t_link_setting); current_link_setting.lane_count = initial_link_setting.lane_count; } @@ -874,18 +887,15 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, } if (policy) { /* minimize lane */ - if (current_link_setting.link_rate_set < - link->dpcd_caps.edp_supported_link_rates_count - && current_link_setting.link_rate < max_link_rate) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; + if (current_link_setting.link_rate < max_link_rate) { + increase_edp_link_rate(link, ¤t_link_setting); } else { if (current_link_setting.lane_count < link->verified_link_cap.lane_count) { current_link_setting.lane_count = increase_lane_count( current_link_setting.lane_count); current_link_setting.link_rate_set = initial_link_setting.link_rate_set; + current_link_setting.use_link_rate_set = initial_link_setting.use_link_rate_set; current_link_setting.link_rate = link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; } else @@ -899,13 +909,8 @@ bool decide_edp_link_settings_with_dsc(struct dc_link *link, increase_lane_count( current_link_setting.lane_count); } else { - if (current_link_setting.link_rate_set < link->dpcd_caps.edp_supported_link_rates_count) { - current_link_setting.link_rate_set++; - current_link_setting.link_rate = - link->dpcd_caps.edp_supported_link_rates[current_link_setting.link_rate_set]; - current_link_setting.lane_count = - initial_link_setting.lane_count; - } else + increase_edp_link_rate(link, ¤t_link_setting); + if (current_link_setting.link_rate == LINK_RATE_UNKNOWN) break; } } @@ -1166,6 +1171,8 @@ static void get_active_converter_info( link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = intersect_frl_link_bw_support( link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps, hdmi_encoded_link_bw); + DC_LOG_DC("%s: pcon frl link bw = %u\n", __func__, + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps); } if (link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps > 0) @@ -1541,7 +1548,11 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) * Override count to 1 if we receive a known bad count (0 or an invalid value) */ if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) { - ASSERT(0); + /* If you see this message consistently, either the host platform has FIXED_VS flag + * incorrectly configured or the sink device is returning an invalid count. + */ + DC_LOG_ERROR("lttpr_caps phy_repeater_cnt is 0x%x, forcing it to 0x80.", + link->dpcd_caps.lttpr_caps.phy_repeater_cnt); link->dpcd_caps.lttpr_caps.phy_repeater_cnt = 0x80; DC_LOG_DC("lttpr_caps forced phy_repeater_cnt = %d\n", link->dpcd_caps.lttpr_caps.phy_repeater_cnt); } @@ -2254,7 +2265,7 @@ bool dp_verify_link_cap_with_retries( memset(&link->verified_link_cap, 0, sizeof(struct dc_link_settings)); - if (!link_detect_connection_type(link, &type) || type == dc_connection_none) { + if (link->link_enc && (!link_detect_connection_type(link, &type) || type == dc_connection_none)) { link->verified_link_cap = fail_safe_link_settings; break; } else if (dp_verify_link_cap(link, known_limit_link_setting, &fail_count)) { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index bf820d2b4dc4..3aa05a2be6c0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -305,16 +305,17 @@ bool edp_is_ilr_optimization_enabled(struct dc_link *link) return true; } -enum dc_link_rate get_max_link_rate_from_ilr_table(struct dc_link *link) +enum dc_link_rate get_max_edp_link_rate(struct dc_link *link) { - enum dc_link_rate link_rate = link->reported_link_cap.link_rate; + enum dc_link_rate max_ilr_rate = LINK_RATE_UNKNOWN; + enum dc_link_rate max_non_ilr_rate = dp_get_max_link_cap(link).link_rate; for (int i = 0; i < link->dpcd_caps.edp_supported_link_rates_count; i++) { - if (link_rate < link->dpcd_caps.edp_supported_link_rates[i]) - link_rate = link->dpcd_caps.edp_supported_link_rates[i]; + if (max_ilr_rate < link->dpcd_caps.edp_supported_link_rates[i]) + max_ilr_rate = link->dpcd_caps.edp_supported_link_rates[i]; } - return link_rate; + return (max_ilr_rate > max_non_ilr_rate ? max_ilr_rate : max_non_ilr_rate); } bool edp_is_ilr_optimization_required(struct dc_link *link, @@ -1167,6 +1168,9 @@ static void edp_set_assr_enable(const struct dc *pDC, struct dc_link *link, link_enc_index = link->link_enc->transmitter - TRANSMITTER_UNIPHY_A; if (link_res->hpo_dp_link_enc) { + if (link->wa_flags.disable_assr_for_uhbr) + return; + link_enc_index = link_res->hpo_dp_link_enc->inst; use_hpo_dp_link_enc = true; } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index 8df8ac5bde5b..30dc8c24c008 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -69,7 +69,7 @@ bool edp_wait_for_t12(struct dc_link *link); bool edp_is_ilr_optimization_required(struct dc_link *link, struct dc_crtc_timing *crtc_timing); bool edp_is_ilr_optimization_enabled(struct dc_link *link); -enum dc_link_rate get_max_link_rate_from_ilr_table(struct dc_link *link); +enum dc_link_rate get_max_edp_link_rate(struct dc_link *link); bool edp_backlight_enable_aux(struct dc_link *link, bool enable); void edp_add_delay_for_T9(struct dc_link *link); bool edp_receiver_ready_T9(struct dc_link *link); diff --git a/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile b/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile index 505bc0517e08..eab196c57c6c 100644 --- a/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile +++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/Makefile @@ -25,6 +25,15 @@ ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### +# DCN20 +############################################################################### +MMHUBBUB_DCN20 = dcn20_mmhubbub.o + +AMD_DAL_MMHUBBUB_DCN20 = $(addprefix $(AMDDALPATH)/dc/mmhubbub/dcn20/,$(MMHUBBUB_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MMHUBBUB_DCN20) + +############################################################################### # DCN32 ############################################################################### MMHUBBUB_DCN32 = dcn32_mmhubbub.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c index 259a98e4ee2c..259a98e4ee2c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.c +++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h index 5ab32aa51e13..5ab32aa51e13 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h +++ b/drivers/gpu/drm/amd/display/dc/mmhubbub/dcn20/dcn20_mmhubbub.h diff --git a/drivers/gpu/drm/amd/display/dc/mpc/Makefile b/drivers/gpu/drm/amd/display/dc/mpc/Makefile index 7f7458c07e2a..5402c3529f5e 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/mpc/Makefile @@ -25,6 +25,33 @@ ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### +# DCN10 +############################################################################### +MPC_DCN10 = dcn10_mpc.o + +AMD_DAL_MPC_DCN10 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn10/,$(MPC_DCN10)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN10) + +############################################################################### +# DCN20 +############################################################################### +MPC_DCN20 = dcn20_mpc.o + +AMD_DAL_MPC_DCN20 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn20/,$(MPC_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN20) + +############################################################################### +# DCN30 +############################################################################### +MPC_DCN30 = dcn30_mpc.o + +AMD_DAL_MPC_DCN30 = $(addprefix $(AMDDALPATH)/dc/mpc/dcn30/,$(MPC_DCN30)) + +AMD_DISPLAY_FILES += $(AMD_DAL_MPC_DCN30) + +############################################################################### # DCN32 ############################################################################### MPC_DCN32 = dcn32_mpc.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c index f2f55565e98a..f2f55565e98a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h index dbfffc6383dc..dbfffc6383dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn10/dcn10_mpc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c index ea73473b970a..ea73473b970a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h index 496658f420db..496658f420db 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn20/dcn20_mpc.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c index 3aeb85ec40b0..fe26fde12eeb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c @@ -25,7 +25,7 @@ #include "reg_helper.h" #include "dcn30_mpc.h" -#include "dcn30_cm_common.h" +#include "dcn30/dcn30_cm_common.h" #include "basics/conversion.h" #include "dcn10/dcn10_cm_common.h" #include "dc.h" diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h index ce93003dae01..ce93003dae01 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h diff --git a/drivers/gpu/drm/amd/display/dc/opp/Makefile b/drivers/gpu/drm/amd/display/dc/opp/Makefile index fbfb3c3ad819..1be76754db30 100644 --- a/drivers/gpu/drm/amd/display/dc/opp/Makefile +++ b/drivers/gpu/drm/amd/display/dc/opp/Makefile @@ -25,6 +25,22 @@ ifdef CONFIG_DRM_AMD_DC_FP ############################################################################### +# DCN10 +############################################################################### +OPP_DCN10 = dcn10_opp.o + +AMD_DAL_OPP_DCN10 = $(addprefix $(AMDDALPATH)/dc/opp/dcn10/,$(OPP_DCN10)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPP_DCN10) +############################################################################### +# DCN20 +############################################################################### +OPP_DCN20 = dcn20_opp.o + +AMD_DAL_OPP_DCN20 = $(addprefix $(AMDDALPATH)/dc/opp/dcn20/,$(OPP_DCN20)) + +AMD_DISPLAY_FILES += $(AMD_DAL_OPP_DCN20) +############################################################################### # DCN35 ############################################################################### OPP_DCN35 = dcn35_opp.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c index 71e9288d60ed..71e9288d60ed 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c +++ b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h index c87de68a509e..c87de68a509e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h +++ b/drivers/gpu/drm/amd/display/dc/opp/dcn10/dcn10_opp.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c index f5fe0cac7cb0..f5fe0cac7cb0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.c +++ b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.c diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h index 34936e6c49f3..34936e6c49f3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_opp.h +++ b/drivers/gpu/drm/amd/display/dc/opp/dcn20/dcn20_opp.h diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c index 94427875bcdd..097d06023e64 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c @@ -65,7 +65,8 @@ void optc1_program_global_sync( int vready_offset, int vstartup_start, int vupdate_offset, - int vupdate_width) + int vupdate_width, + int pstate_keepout) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -73,6 +74,7 @@ void optc1_program_global_sync( optc1->vstartup_start = vstartup_start; optc1->vupdate_offset = vupdate_offset; optc1->vupdate_width = vupdate_width; + optc1->pstate_keepout = pstate_keepout; if (optc1->vstartup_start == 0) { BREAK_TO_DEBUGGER(); @@ -146,6 +148,7 @@ void optc1_setup_vertical_interrupt2( * @vstartup_start: Vstartup period. * @vupdate_offset: Vupdate starting position. * @vupdate_width: Vupdate duration. + * @pstate_keepout: determines low power mode timing during refresh * @signal: DC signal types. * @use_vbios: to program timings from BIOS command table. * @@ -157,6 +160,7 @@ void optc1_program_timing( int vstartup_start, int vupdate_offset, int vupdate_width, + int pstate_keepout, const enum signal_type signal, bool use_vbios) { @@ -177,6 +181,7 @@ void optc1_program_timing( optc1->vstartup_start = vstartup_start; optc1->vupdate_offset = vupdate_offset; optc1->vupdate_width = vupdate_width; + optc1->pstate_keepout = pstate_keepout; patched_crtc_timing = *dc_crtc_timing; apply_front_porch_workaround(&patched_crtc_timing); optc1->orginal_patched_timing = patched_crtc_timing; @@ -282,7 +287,8 @@ void optc1_program_timing( vready_offset, vstartup_start, vupdate_offset, - vupdate_width); + vupdate_width, + pstate_keepout); optc->funcs->set_vtg_params(optc, dc_crtc_timing, true); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h index 369a13244e5e..b7a57f98553d 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h @@ -201,6 +201,7 @@ struct dcn_optc_registers { uint32_t OTG_CRC1_WINDOWB_Y_CONTROL_READBACK; uint32_t OPTC_CLOCK_CONTROL; uint32_t OPTC_WIDTH_CONTROL2; + uint32_t OTG_PSTATE_REGISTER; }; #define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\ @@ -590,7 +591,11 @@ struct dcn_optc_registers { type OTG_V_COUNT_STOP_TIMER; #define TG_REG_FIELD_LIST_DCN401(type) \ - type OPTC_SEGMENT_WIDTH_LAST; + type OPTC_SEGMENT_WIDTH_LAST;\ + type OTG_PSTATE_KEEPOUT_START;\ + type OTG_PSTATE_EXTEND;\ + type OTG_UNBLANK;\ + type OTG_PSTATE_ALLOW_WIDTH_MIN; struct dcn_optc_shift { diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c index 6bbbf313b2bb..4b6446ed4ce4 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn31/dcn31_optc.c @@ -149,7 +149,9 @@ static bool optc31_disable_crtc(struct timing_generator *optc) return true; } - +/* + * Immediate_Disable_Crtc - this is to temp disable Timing generator without reset ODM. + */ bool optc31_immediate_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -162,10 +164,12 @@ bool optc31_immediate_disable_crtc(struct timing_generator *optc) VTG0_ENABLE, 0); /* CRTC disabled, so disable clock. */ - REG_WAIT(OTG_CLOCK_CONTROL, + if (optc->ctx->dce_environment != DCE_ENV_DIAG) + REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); + /* clear the false state */ optc1_clear_optc_underflow(optc); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c index 9f5c2efa7560..a5d6a7dca554 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c @@ -396,13 +396,47 @@ void optc401_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, i } } +static void optc401_program_global_sync( + struct timing_generator *optc, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + int pstate_keepout) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + optc1->vready_offset = vready_offset; + optc1->vstartup_start = vstartup_start; + optc1->vupdate_offset = vupdate_offset; + optc1->vupdate_width = vupdate_width; + optc1->pstate_keepout = pstate_keepout; + + if (optc1->vstartup_start == 0) { + BREAK_TO_DEBUGGER(); + return; + } + + REG_SET(OTG_VSTARTUP_PARAM, 0, + VSTARTUP_START, optc1->vstartup_start); + + REG_SET_2(OTG_VUPDATE_PARAM, 0, + VUPDATE_OFFSET, optc1->vupdate_offset, + VUPDATE_WIDTH, optc1->vupdate_width); + + REG_SET(OTG_VREADY_PARAM, 0, + VREADY_OFFSET, optc1->vready_offset); + + REG_UPDATE(OTG_PSTATE_REGISTER, OTG_PSTATE_KEEPOUT_START, pstate_keepout); +} + static struct timing_generator_funcs dcn401_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, - .program_global_sync = optc1_program_global_sync, + .program_global_sync = optc401_program_global_sync, .enable_crtc = optc401_enable_crtc, .disable_crtc = optc401_disable_crtc, .phantom_crtc_post_enable = optc401_phantom_crtc_post_enable, diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h index 3114ecef332a..bb13a645802d 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.h @@ -155,7 +155,11 @@ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE, mask_sh),\ SF(OTG0_OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_MODE_MANUAL, mask_sh),\ SF(OTG0_OTG_DOUBLE_BUFFER_CONTROL, OTG_DRR_TIMING_DBUF_UPDATE_MODE, mask_sh),\ - SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) + SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_KEEPOUT_START, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_EXTEND, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_UNBLANK, mask_sh),\ + SF(OTG0_OTG_PSTATE_REGISTER, OTG_PSTATE_ALLOW_WIDTH_MIN, mask_sh) void dcn401_timing_generator_init(struct optc *optc1); diff --git a/drivers/gpu/drm/amd/display/dc/resource/Makefile b/drivers/gpu/drm/amd/display/dc/resource/Makefile index 4860bb2531a1..09320344d8e9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/Makefile +++ b/drivers/gpu/drm/amd/display/dc/resource/Makefile @@ -198,8 +198,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_RESOURCE_DCN351) ############################################################################### -############################################################################### - RESOURCE_DCN401 = dcn401_resource.o AMD_DAL_RESOURCE_DCN401 = $(addprefix $(AMDDALPATH)/dc/resource/dcn401/,$(RESOURCE_DCN401)) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c index fe518fd27b08..91da5cf85b69 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce110/dce110_resource.c @@ -1163,6 +1163,7 @@ static struct pipe_ctx *dce110_acquire_underlay( 0, 0, 0, + 0, pipe_ctx->stream->signal, false); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c index 88afb2a30eef..162856c523e4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dce112/dce112_resource.c @@ -1067,7 +1067,10 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) struct dm_pp_clock_levels clks = {0}; int memory_type_multiplier = MEMORY_TYPE_MULTIPLIER_CZ; - if (dc->bw_vbios && dc->bw_vbios->memory_type == bw_def_hbm) + if (!dc->bw_vbios) + return; + + if (dc->bw_vbios->memory_type == bw_def_hbm) memory_type_multiplier = MEMORY_TYPE_HBM; /*do system clock TODO PPLIB: after PPLIB implement, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 5e7cfa8e8ec9..eea2b3b307cd 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -2040,6 +2040,7 @@ bool dcn20_fast_validate_bw( { bool out = false; int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; int pipe_cnt, i, pipe_idx, vlevel; ASSERT(pipes); @@ -2064,7 +2065,7 @@ bool dcn20_fast_validate_bw( if (vlevel > context->bw_ctx.dml.soc.num_states) goto validate_fail; - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); /*initialize pipe_just_split_from to invalid idx*/ for (i = 0; i < MAX_PIPES; i++) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index 131d98025bd4..fc54483b9104 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -1007,8 +1007,10 @@ static struct pipe_ctx *dcn201_acquire_free_pipe_for_layer( struct pipe_ctx *head_pipe = resource_get_otg_master_for_stream(res_ctx, opp_head_pipe->stream); struct pipe_ctx *idle_pipe = resource_find_free_secondary_pipe_legacy(res_ctx, pool, head_pipe); - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } if (!idle_pipe) return NULL; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 8663cbc3d1cf..347e6aaea582 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -774,6 +774,7 @@ bool dcn21_fast_validate_bw(struct dc *dc, { bool out = false; int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; int pipe_cnt, i, pipe_idx, vlevel; ASSERT(pipes); @@ -816,7 +817,7 @@ bool dcn21_fast_validate_bw(struct dc *dc, goto validate_fail; } - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, NULL); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 5d1801dce273..ac8cb20e2e3b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1948,6 +1948,7 @@ static bool dcn31_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 969658313fd6..a124ad9bd108 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1651,6 +1651,9 @@ static void dcn32_enable_phantom_plane(struct dc *dc, else phantom_plane = dc_state_create_phantom_plane(dc, context, curr_pipe->plane_state); + if (!phantom_plane) + continue; + memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, sizeof(phantom_plane->scaling_quality)); @@ -1717,6 +1720,9 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, // be a valid candidate for SubVP (i.e. has a plane, stream, doesn't // already have phantom pipe assigned, etc.) by previous checks. phantom_stream = dcn32_enable_phantom_stream(dc, context, pipes, pipe_cnt, index); + if (!phantom_stream) + return; + dcn32_enable_phantom_plane(dc, context, phantom_stream, index); for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -2220,6 +2226,7 @@ static bool dcn32_resource_construct( dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { @@ -2671,8 +2678,10 @@ static struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx; int head_index; - if (!head_pipe) + if (!head_pipe) { ASSERT(0); + return NULL; + } /* * Modified from dcn20_acquire_idle_pipe_for_layer diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h index fee67fbab8e2..7901792afb7b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h @@ -505,6 +505,8 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SRI_ARR(CM_POST_CSC_B_C11_C12, CM, id), \ SRI_ARR(CM_POST_CSC_B_C33_C34, CM, id), \ SRI_ARR(CM_MEM_PWR_CTRL, CM, id), SRI_ARR(CM_CONTROL, CM, id), \ + SRI_ARR(CM_TEST_DEBUG_INDEX, CM, id), \ + SRI_ARR(CM_TEST_DEBUG_DATA, CM, id), \ SRI_ARR(FORMAT_CONTROL, CNVC_CFG, id), \ SRI_ARR(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \ SRI_ARR(CURSOR0_CONTROL, CNVC_CUR, id), \ @@ -761,6 +763,7 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SRI_ARR(DSCC_RATE_CONTROL_BUFFER1_MAX_FULLNESS_LEVEL, DSCC, id), \ SRI_ARR(DSCC_RATE_CONTROL_BUFFER2_MAX_FULLNESS_LEVEL, DSCC, id), \ SRI_ARR(DSCC_RATE_CONTROL_BUFFER3_MAX_FULLNESS_LEVEL, DSCC, id), \ + SRI_ARR(DSCC_TEST_DEBUG_BUS_ROTATE, DSCC, id), \ SRI_ARR(DSCCIF_CONFIG0, DSCCIF, id), \ SRI_ARR(DSCCIF_CONFIG1, DSCCIF, id), \ SRI_ARR(DSCRM_DSC_FORWARD_CONFIG, DSCRM, id) @@ -1185,6 +1188,8 @@ unsigned int dcn32_calculate_mall_ways_from_bytes(const struct dc *dc, unsigned SR(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL), \ SR(DCHUBBUB_ARB_DRAM_STATE_CNTL), SR(DCHUBBUB_ARB_SAT_LEVEL), \ SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND), SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_TEST_DEBUG_INDEX), \ + SR(DCHUBBUB_TEST_DEBUG_DATA), \ SR(DCHUBBUB_SOFT_RESET), SR(DCHUBBUB_CRC_CTRL), \ SR(DCN_VM_FB_LOCATION_BASE), SR(DCN_VM_FB_LOCATION_TOP), \ SR(DCN_VM_FB_OFFSET), SR(DCN_VM_AGP_BOT), SR(DCN_VM_AGP_TOP), \ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c index d184105ce2b3..f5a4e97c40ce 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource_helpers.c @@ -218,12 +218,12 @@ bool dcn32_is_center_timing(struct pipe_ctx *pipe) pipe->stream->timing.v_addressable != pipe->stream->src.height) { is_center_timing = true; } - } - if (pipe->plane_state) { - if (pipe->stream->timing.v_addressable != pipe->plane_state->dst_rect.height && - pipe->stream->timing.v_addressable != pipe->plane_state->src_rect.height) { - is_center_timing = true; + if (pipe->plane_state) { + if (pipe->stream->timing.v_addressable != pipe->plane_state->dst_rect.height && + pipe->stream->timing.v_addressable != pipe->plane_state->src_rect.height) { + is_center_timing = true; + } } } @@ -663,7 +663,7 @@ bool dcn32_subvp_drr_admissable(struct dc *dc, struct dc_state *context) subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + pipe->stream->timing.v_total * (unsigned long long)pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); } @@ -724,7 +724,7 @@ bool dcn32_subvp_vblank_admissable(struct dc *dc, struct dc_state *context, int subvp_disallow |= disallow_subvp_in_active_plus_blank(pipe); refresh_rate = (pipe->stream->timing.pix_clk_100hz * (uint64_t)100 + - pipe->stream->timing.v_total * pipe->stream->timing.h_total - (uint64_t)1); + pipe->stream->timing.v_total * (unsigned long long)pipe->stream->timing.h_total - (uint64_t)1); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.v_total); refresh_rate = div_u64(refresh_rate, pipe->stream->timing.h_total); } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 9a3cc0514a36..827a94f84f10 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1778,8 +1778,12 @@ static bool dcn321_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index ddf251901fb3..46ad684fe192 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -786,6 +786,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dmub_reallow_idle = false, .static_screen_wait_frames = 2, .disable_timeout = true, + .min_disp_clk_khz = 50000, }; static const struct dc_panel_config panel_config_defaults = { @@ -1899,6 +1900,7 @@ static bool dcn35_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; + dc->config.disable_hbr_audio_dp2 = true; /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index a05a2209a44e..02e63b95c36d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -723,6 +723,7 @@ static const struct dc_debug_options debug_defaults_drv = { .min_prefetch_in_strobe_ns = 60000, // 60us .disable_unbounded_requesting = false, .enable_legacy_fast_update = false, + .dcc_meta_propagation_delay_us = 10, .fams2_config = { .bits = { .enable = true, @@ -1187,7 +1188,7 @@ static struct stream_encoder *dcn401_stream_encoder_create( vpg = dcn401_vpg_create(ctx, vpg_inst); afmt = dcn401_afmt_create(ctx, afmt_inst); - if (!enc1 || !vpg || !afmt) { + if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) { kfree(enc1); kfree(vpg); kfree(afmt); @@ -1821,6 +1822,7 @@ static bool dcn401_resource_construct( dc->caps.edp_dsc_support = true; dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; + dc->caps.max_v_total = (1 << 15) - 1; if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) dc->caps.dcc_plane_width_limit = 7680; @@ -2098,6 +2100,7 @@ static bool dcn401_resource_construct( dc->dml2_options.use_native_soc_bb_construction = true; dc->dml2_options.minimize_dispclk_using_odm = true; dc->dml2_options.map_dc_pipes_with_callbacks = true; + dc->dml2_options.force_tdlut_enable = true; resource_init_common_dml2_callbacks(dc, &dc->dml2_options); dc->dml2_options.callbacks.can_support_mclk_switch_using_fw_based_vblank_stretch = &dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 26efeada4f41..514d1ce20df9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -138,7 +138,9 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \ SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \ SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \ - HUBP_3DLUT_FL_REG_LIST_DCN401(id) + HUBP_3DLUT_FL_REG_LIST_DCN401(id), \ + SRI_ARR(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, HUBP, id), \ + SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id) /* ABM */ #define ABM_DCN401_REG_LIST_RI(id) \ @@ -534,7 +536,8 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(OPTC_WIDTH_CONTROL, ODM, inst), \ SRI_ARR(OPTC_WIDTH_CONTROL2, ODM, inst), \ SRI_ARR(OPTC_MEMORY_CONFIG, ODM, inst), \ - SRI_ARR(OTG_DRR_CONTROL, OTG, inst) + SRI_ARR(OTG_DRR_CONTROL, OTG, inst), \ + SRI_ARR(OTG_PSTATE_REGISTER, OTG, inst) /* HUBBUB */ #define HUBBUB_REG_LIST_DCN4_01_RI(id) \ diff --git a/drivers/gpu/drm/amd/display/dc/spl/Makefile b/drivers/gpu/drm/amd/display/dc/spl/Makefile index 89cad60b1a10..f8df85ea4d32 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/Makefile +++ b/drivers/gpu/drm/amd/display/dc/spl/Makefile @@ -23,7 +23,7 @@ # Makefile for the 'spl' sub-component of DAL. # It provides the scaling library interface. -SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_isharp_filters.o +SPL = dc_spl.o dc_spl_scl_filters.o dc_spl_scl_filters_old.o dc_spl_isharp_filters.o AMD_DAL_SPL = $(addprefix $(AMDDALPATH)/dc/spl/,$(SPL)) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index e3e20cd86af6..9eccdb38bed4 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -910,6 +910,7 @@ static const uint16_t *spl_dscl_get_filter_coeffs_64p(int taps, struct fixed31_3 return NULL; else { /* should never happen, bug */ + BREAK_TO_DEBUGGER(); return NULL; } } @@ -925,13 +926,14 @@ static void spl_set_filters_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->filter_v_c = spl_dscl_get_filter_coeffs_64p( data->taps.v_taps_c, data->ratios.vert_c); } - +#ifdef CONFIG_DRM_AMD_DC_FP static const uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps) { if ((taps == 3) || (taps == 4) || (taps == 6)) return spl_get_filter_isharp_bs_4tap_64p(); else { /* should never happen, bug */ + BREAK_TO_DEBUGGER(); return NULL; } } @@ -943,7 +945,7 @@ static void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data, dscl_prog_data->filter_blur_scale_v = spl_dscl_get_blur_scale_coeffs_64p( data->taps.v_taps); } - +#endif /* Populate dscl prog data structure from scaler data calculated by SPL */ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_out *spl_out) { @@ -1321,7 +1323,7 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, break; case SHARPNESS_HIGH: dscl_prog_data->isharp_delta = spl_get_filter_isharp_1D_lut_2p0x(); - break; + break; default: BREAK_TO_DEBUGGER(); } @@ -1344,7 +1346,9 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, } // Set the values as per lookup table +#ifdef CONFIG_DRM_AMD_DC_FP spl_set_blur_scale_data(dscl_prog_data, data); +#endif } static bool spl_get_isharp_en(struct adaptive_sharpness adp_sharpness, int vscale_ratio, int hscale_ratio, struct spl_taps taps, @@ -1407,6 +1411,8 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) bool enable_easf_v = false; bool enable_easf_h = false; bool lls_enable_easf = true; + int vratio = 0; + int hratio = 0; const struct spl_scaler_data *data = &spl_out->scl_data; // All SPL calls /* recout calculation */ @@ -1446,8 +1452,8 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) // Save all calculated parameters in dscl_prog_data structure to program hw registers spl_set_dscl_prog_data(spl_in, spl_out); - int vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); - int hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); + vratio = dc_fixpt_ceil(spl_out->scl_data.ratios.vert); + hratio = dc_fixpt_ceil(spl_out->scl_data.ratios.horz); if (!lls_enable_easf || spl_in->disable_easf) { enable_easf_v = false; enable_easf_h = false; diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h index f1fd3eb92f8a..205e59a2a8ee 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h @@ -9,16 +9,8 @@ #define BLACK_OFFSET_RGB_Y 0x0 #define BLACK_OFFSET_CBCR 0x8000 -#ifdef __cplusplus -extern "C" { -#endif - /* SPL interfaces */ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out); -#ifdef __cplusplus -} -#endif - #endif /* __DC_SPL_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c index 8bc838c7c3c5..a5d9a6223d06 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "dc_spl_types.h" #include "dc_spl_isharp_filters.h" //======================================== diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c index c174b2e8a150..e2baaf584139 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters.c @@ -2,7 +2,6 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "dc_spl_types.h" #include "dc_spl_scl_filters.h" //========================================= // <num_taps> = 2 diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c new file mode 100644 index 000000000000..bb0e1b80ec3c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_scl_filters_old.c @@ -0,0 +1,25 @@ +/* + * Copyright 2012-16 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 201201d3f55b..36d10b0f2eed 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -2,31 +2,14 @@ // // Copyright 2024 Advanced Micro Devices, Inc. -#include "os_types.h" -#include "dc_hw_types.h" +#include "os_types.h" // swap #ifndef ASSERT -#define ASSERT(_bool) (void *)0 +#define ASSERT(_bool) ((void *)0) #endif #include "include/fixed31_32.h" // fixed31_32 and related functions #ifndef __DC_SPL_TYPES_H__ #define __DC_SPL_TYPES_H__ -enum lb_memory_config { - /* Enable all 3 pieces of memory */ - LB_MEMORY_CONFIG_0 = 0, - - /* Enable only the first piece of memory */ - LB_MEMORY_CONFIG_1 = 1, - - /* Enable only the second piece of memory */ - LB_MEMORY_CONFIG_2 = 2, - - /* Only applicable in 4:2:0 mode, enable all 3 pieces of memory and the - * last piece of chroma memory used for the luma storage - */ - LB_MEMORY_CONFIG_3 = 3 -}; - struct spl_size { uint32_t width; uint32_t height; @@ -88,6 +71,22 @@ enum spl_pixel_format { SPL_PIXEL_FORMAT_UNKNOWN }; +enum lb_memory_config { + /* Enable all 3 pieces of memory */ + LB_MEMORY_CONFIG_0 = 0, + + /* Enable only the first piece of memory */ + LB_MEMORY_CONFIG_1 = 1, + + /* Enable only the second piece of memory */ + LB_MEMORY_CONFIG_2 = 2, + + /* Only applicable in 4:2:0 mode, enable all 3 pieces of memory and the + * last piece of chroma memory used for the luma storage + */ + LB_MEMORY_CONFIG_3 = 3 +}; + /* Rotation angle */ enum spl_rotation_angle { SPL_ROTATION_ANGLE_0 = 0, @@ -120,6 +119,13 @@ enum spl_color_space { SPL_COLOR_SPACE_YCBCR709_BLACK, }; +enum chroma_cositing { + CHROMA_COSITING_NONE, + CHROMA_COSITING_LEFT, + CHROMA_COSITING_TOPLEFT, + CHROMA_COSITING_COUNT +}; + // Scratch space for calculating scaler params struct spl_scaler_data { int h_active; diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h b/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h new file mode 100644 index 000000000000..7ebea91c84f6 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_os_types.h @@ -0,0 +1,77 @@ +/* + * Copyright 2012-16 Advanced Micro Devices, Inc. + * Copyright 2019 Raptor Engineering, LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _SPL_OS_TYPES_H_ +#define _SPL_OS_TYPES_H_ + +#include <linux/slab.h> +#include <linux/kgdb.h> +#include <linux/kref.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/mm.h> + +/* + * + * general debug capabilities + * + */ +// TODO: need backport +#define SPL_BREAK_TO_DEBUGGER() ASSERT(0) + +static inline uint64_t spl_div_u64_rem(uint64_t dividend, uint32_t divisor, uint32_t *remainder) +{ + return div_u64_rem(dividend, divisor, remainder); +} + +static inline uint64_t spl_div_u64(uint64_t dividend, uint32_t divisor) +{ + return div_u64(dividend, divisor); +} + +static inline uint64_t spl_div64_u64(uint64_t dividend, uint64_t divisor) +{ + return div64_u64(dividend, divisor); +} + +static inline uint64_t spl_div64_u64_rem(uint64_t dividend, uint64_t divisor, uint64_t *remainder) +{ + return div64_u64_rem(dividend, divisor, remainder); +} + +static inline int64_t spl_div64_s64(int64_t dividend, int64_t divisor) +{ + return div64_s64(dividend, divisor); +} + +#define spl_swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#ifndef spl_min +#define spl_min(a, b) (((a) < (b)) ? (a):(b)) +#endif + +#endif /* _SPL_OS_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 6589bb9aea6b..cd70453aeae0 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -330,6 +330,9 @@ struct dmub_diagnostic_data { uint32_t inbox0_rptr; uint32_t inbox0_wptr; uint32_t inbox0_size; + uint32_t outbox1_rptr; + uint32_t outbox1_wptr; + uint32_t outbox1_size; uint32_t gpint_datain0; struct dmub_srv_debug timeout_info; uint8_t is_dmcub_enabled : 1; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 5ff0a865705f..c5f99cbff0b6 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -336,6 +336,10 @@ union dmub_psr_debug_flags { */ uint32_t back_to_back_flip : 1; + /** + * Enable visual confirm for IPS + */ + uint32_t enable_ips_visual_confirm : 1; } bitfields; /** @@ -3024,14 +3028,6 @@ struct dmub_cmd_update_dirty_rect_data { * Currently the support is only for 0 or 1 */ uint8_t panel_inst; - /** - * 16-bit value dicated by driver that indicates the coasting vtotal high byte part. - */ - uint16_t coasting_vtotal_high; - /** - * Explicit padding to 4 byte boundary. - */ - uint8_t pad[2]; }; /** diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 662c34e9495c..d9f31b191c69 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -449,6 +449,10 @@ void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c index e1da270502cc..9600b7f858b0 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c @@ -459,6 +459,10 @@ void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 916ed022e96b..746696b6f09a 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -502,6 +502,10 @@ void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c index cf139e9cc20e..39a8cb6d7523 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -444,6 +444,10 @@ void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnost diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); diag_data->is_dmcub_enabled = is_dmub_enabled; diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index d4cf7ead1d87..990fa1f19c22 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -531,4 +531,10 @@ static inline struct fixed31_32 dc_fixpt_truncate(struct fixed31_32 arg, unsigne return arg; } +struct fixed31_32 dc_fixpt_from_ux_dy(unsigned int value, unsigned int integer_bits, unsigned int fractional_bits); +struct fixed31_32 dc_fixpt_from_int_dy(unsigned int int_value, + unsigned int frac_value, + unsigned int integer_bits, + unsigned int fractional_bits); + #endif diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h index 83479951732a..a48d564d1660 100644 --- a/drivers/gpu/drm/amd/display/include/logger_types.h +++ b/drivers/gpu/drm/amd/display/include/logger_types.h @@ -61,6 +61,7 @@ #define DC_LOG_ALL_TF_CHANNELS(...) pr_debug("[GAMMA]:"__VA_ARGS__) #define DC_LOG_DSC(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_SMU(...) pr_debug("[SMU_MSG]:"__VA_ARGS__) +#define DC_LOG_MALL(...) pr_debug("[MALL]:"__VA_ARGS__) #define DC_LOG_DWB(...) drm_dbg((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_DP2(...) drm_dbg_dp((DC_LOGGER)->dev, __VA_ARGS__) #define DC_LOG_AUTO_DPM_TEST(...) pr_debug("[AutoDPMTest]: "__VA_ARGS__) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 1e495e884484..8bc377560787 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -432,18 +432,18 @@ static enum mod_hdcp_status authenticated_dp(struct mod_hdcp *hdcp, goto out; } - if (!mod_hdcp_execute_and_set(mod_hdcp_read_bstatus, + mod_hdcp_execute_and_set(mod_hdcp_read_bstatus, &input->bstatus_read, &status, - hdcp, "bstatus_read")) - goto out; - if (!mod_hdcp_execute_and_set(check_link_integrity_dp, + hdcp, "bstatus_read"); + + mod_hdcp_execute_and_set(check_link_integrity_dp, &input->link_integrity_check, &status, - hdcp, "link_integrity_check")) - goto out; - if (!mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, + hdcp, "link_integrity_check"); + + mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, &input->reauth_request_check, &status, - hdcp, "reauth_request_check")) - goto out; + hdcp, "reauth_request_check"); + out: return status; } diff --git a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h index 8ee3149df5b7..2ef1273e65ab 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_4_0_sh_mask.h @@ -340,8 +340,6 @@ #define UVD_LMI_CTRL__REQ_MODE_MASK 0x00000200L #define UVD_LMI_CTRL__REQ_MODE__SHIFT 0x00000009 #define UVD_LMI_CTRL__RFU_MASK 0xf8000000L -#define UVD_LMI_CTRL__RFU_MASK 0xfc000000L -#define UVD_LMI_CTRL__RFU__SHIFT 0x0000001a #define UVD_LMI_CTRL__RFU__SHIFT 0x0000001b #define UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK 0x00200000L #define UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN__SHIFT 0x00000015 diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 46bf19c9c5c4..710e328fad48 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -258,6 +258,48 @@ struct gc_info_v1_2 { uint32_t gc_gl2c_per_gpu; }; +struct gc_info_v1_3 { + struct gpu_info_header header; + uint32_t gc_num_se; + uint32_t gc_num_wgp0_per_sa; + uint32_t gc_num_wgp1_per_sa; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_gl2c; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_sa_per_se; + uint32_t gc_num_packer_per_sc; + uint32_t gc_num_gl2a; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; + uint32_t gc_num_tcp_per_wpg; + uint32_t gc_tcp_l1_size; + uint32_t gc_num_sqc_per_wgp; + uint32_t gc_l1_instruction_cache_size_per_sqc; + uint32_t gc_l1_data_cache_size_per_sqc; + uint32_t gc_gl1c_per_sa; + uint32_t gc_gl1c_size_per_instance; + uint32_t gc_gl2c_per_gpu; + uint32_t gc_tcp_size_per_cu; + uint32_t gc_tcp_cache_line_size; + uint32_t gc_instruction_cache_size_per_sqc; + uint32_t gc_instruction_cache_line_size; + uint32_t gc_scalar_data_cache_size_per_sqc; + uint32_t gc_scalar_data_cache_line_size; + uint32_t gc_tcc_size; + uint32_t gc_tcc_cache_line_size; +}; + struct gc_info_v2_0 { struct gpu_info_header header; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 6d094cf3587d..7744ca3ef4b1 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -318,6 +318,12 @@ struct kfd2kgd_calls { void (*program_trap_handler_settings)(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst); + uint64_t (*hqd_get_pq_addr)(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst); + uint64_t (*hqd_reset)(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout); }; #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 4b20e2274313..19a48d98830a 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -218,6 +218,7 @@ enum pp_mp1_state { PP_MP1_STATE_SHUTDOWN, PP_MP1_STATE_UNLOAD, PP_MP1_STATE_RESET, + PP_MP1_STATE_FLR, }; enum pp_df_cstate { diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index 4cf2c9f30b3d..101e2fe962c6 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -97,6 +97,7 @@ enum MES_QUEUE_TYPE { MES_QUEUE_TYPE_SDMA, MES_QUEUE_TYPE_MAX, + MES_QUEUE_TYPE_SCHQ = MES_QUEUE_TYPE_MAX, }; struct MES_API_STATUS { @@ -242,8 +243,12 @@ union MESAPI_SET_HW_RESOURCES { uint32_t send_write_data : 1; uint32_t os_tdr_timeout_override : 1; uint32_t use_rs64mem_for_proc_gang_ctx : 1; + uint32_t halt_on_misaligned_access : 1; + uint32_t use_add_queue_unmap_flag_addr : 1; + uint32_t enable_mes_sch_stb_log : 1; + uint32_t limit_single_process : 1; uint32_t unmapped_doorbell_handling: 2; - uint32_t reserved : 15; + uint32_t reserved : 11; }; uint32_t uint32_all; }; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 8b7d6ed7e2ed..9dc82f4d7c93 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -168,7 +168,11 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, int ret = 0; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - if (pp_funcs && pp_funcs->set_mp1_state) { + if (mp1_state == PP_MP1_STATE_FLR) { + /* VF lost access to SMU */ + if (amdgpu_sriov_vf(adev)) + adev->pm.dpm_enabled = false; + } else if (pp_funcs && pp_funcs->set_mp1_state) { mutex_lock(&adev->pm.mutex); ret = pp_funcs->set_mp1_state( diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c index ca1c7ae8d146..f06b29e33ba4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/processpptables.c @@ -1183,6 +1183,8 @@ static int init_overdrive_limits(struct pp_hwmgr *hwmgr, fw_info = smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, FirmwareInfo), &size, &frev, &crev); + PP_ASSERT_WITH_CODE(fw_info != NULL, + "Missing firmware info!", return -EINVAL); if ((fw_info->ucTableFormatRevision == 1) && (le16_to_cpu(fw_info->usStructureSize) >= sizeof(ATOM_FIRMWARE_INFO_V1_4))) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 6e717ddbb029..9ace863792d4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -2934,9 +2934,7 @@ static int vega10_stop_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap) } } - vega10_enable_smc_features(hwmgr, false, feature_mask); - - return 0; + return vega10_enable_smc_features(hwmgr, false, feature_mask); } /** diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h index de2e442281ff..87ca5ceb1ece 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h @@ -92,7 +92,6 @@ //Resets #define PPSMC_MSG_PrepareMp1ForUnload 0x2E -#define PPSMC_MSG_Mode1Reset 0x2F //Set SystemVirtual DramAddrHigh #define PPSMC_MSG_SetSystemVirtualDramAddrHigh 0x30 @@ -119,11 +118,12 @@ //STB to dram log #define PPSMC_MSG_DumpSTBtoDram 0x3D -#define PPSMC_MSG_STBtoDramLogSetDramAddrHigh 0x3E -#define PPSMC_MSG_STBtoDramLogSetDramAddrLow 0x3F +#define PPSMC_MSG_STBtoDramLogSetDramAddress 0x3E +#define PPSMC_MSG_DummyUndefined 0x3F #define PPSMC_MSG_STBtoDramLogSetDramSize 0x40 #define PPSMC_MSG_SetOBMTraceBufferLogging 0x41 +#define PPSMC_MSG_UseProfilingMode 0x42 #define PPSMC_MSG_AllowGfxDcs 0x43 #define PPSMC_MSG_DisallowGfxDcs 0x44 #define PPSMC_MSG_EnableAudioStutterWA 0x45 @@ -135,6 +135,16 @@ #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4B #define PPSMC_MSG_SetPriorityDeltaGain 0x4C #define PPSMC_MSG_AllowIHHostInterrupt 0x4D +#define PPSMC_MSG_EnableShadowDpm 0x4E #define PPSMC_MSG_Mode3Reset 0x4F -#define PPSMC_Message_Count 0x50 +#define PPSMC_MSG_SetDriverDramAddr 0x50 +#define PPSMC_MSG_SetToolsDramAddr 0x51 +#define PPSMC_MSG_TransferTableSmu2DramWithAddr 0x52 +#define PPSMC_MSG_TransferTableDram2SmuWithAddr 0x53 +#define PPSMC_MSG_GetAllRunningSmuFeatures 0x54 +#define PPSMC_MSG_GetSvi3Voltage 0x55 +#define PPSMC_MSG_UpdatePolicy 0x56 +#define PPSMC_MSG_ExtPwrConnSupport 0x57 +#define PPSMC_MSG_PreloadSwPstateForUclkOverDrive 0x58 +#define PPSMC_Message_Count 0x59 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h index 4a3fde89aed7..75c921e87360 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h @@ -27,7 +27,8 @@ #pragma pack(push, 1) -#define SMU_14_0_2_TABLE_FORMAT_REVISION 3 +#define SMU_14_0_2_TABLE_FORMAT_REVISION 23 +#define SMU_14_0_2_CUSTOM_TABLE_FORMAT_REVISION 1 // POWERPLAYTABLE::ulPlatformCaps #define SMU_14_0_2_PP_PLATFORM_CAP_POWERPLAY 0x1 // This cap indicates whether CCC need to show Powerplay page. @@ -43,6 +44,7 @@ #define SMU_14_0_2_PP_THERMALCONTROLLER_NONE 0 #define SMU_14_0_2_PP_OVERDRIVE_VERSION 0x1 // TODO: FIX OverDrive Version TBD +#define SMU_14_0_2_PP_CUSTOM_OVERDRIVE_VERSION 0x1 #define SMU_14_0_2_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving Clock Table Version 1.00 enum SMU_14_0_2_OD_SW_FEATURE_CAP @@ -107,6 +109,7 @@ enum SMU_14_0_2_PWRMODE_SETTING SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_BALANCE, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_TURBO, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_RAGE, + SMU_14_0_2_PMSETTING_COUNT }; #define SMU_14_0_2_MAX_PMSETTING 32 // Maximum Number of PowerMode Settings @@ -127,17 +130,24 @@ struct smu_14_0_2_overdrive_table int16_t pm_setting[SMU_14_0_2_MAX_PMSETTING]; // Optimized power mode feature settings }; +enum smu_14_0_3_pptable_source { + PPTABLE_SOURCE_IFWI = 0, + PPTABLE_SOURCE_DRIVER_HARDCODED = 1, + PPTABLE_SOURCE_PPGEN_REGISTRY = 2, + PPTABLE_SOURCE_MAX = PPTABLE_SOURCE_PPGEN_REGISTRY, +}; + struct smu_14_0_2_powerplay_table { struct atom_common_table_header header; // header.format_revision = 3 (HAS TO MATCH SMU_14_0_2_TABLE_FORMAT_REVISION), header.content_revision = ? structuresize is calculated by PPGen. uint8_t table_revision; // PPGen use only: table_revision = 3 - uint8_t padding; // Padding 1 byte to align table_size offset to 6 bytes (pmfw_start_offset, for PMFW to know the starting offset of PPTable_t). + uint8_t pptable_source; // PPGen UI dropdown box uint16_t pmfw_pptable_start_offset; // The start offset of the pmfw portion. i.e. start of PPTable_t (start of SkuTable_t) uint16_t pmfw_pptable_size; // The total size of pmfw_pptable, i.e PPTable_t. - uint16_t pmfw_pfe_table_start_offset; // The start offset of the PFE_Settings_t within pmfw_pptable. - uint16_t pmfw_pfe_table_size; // The size of PFE_Settings_t. - uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t within pmfw_pptable. - uint16_t pmfw_board_table_size; // The size of BoardTable_t. + uint16_t pmfw_sku_table_start_offset; // DO NOT CHANGE ORDER; The absolute start offset of the SkuTable_t (within smu_14_0_3_powerplay_table). + uint16_t pmfw_sku_table_size; // DO NOT CHANGE ORDER; The size of SkuTable_t. + uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t + uint16_t pmfw_board_table_size; // The size of BoardTable_t. uint16_t pmfw_custom_sku_table_start_offset; // The start offset of the CustomSkuTable_t within pmfw_pptable. uint16_t pmfw_custom_sku_table_size; // The size of the CustomSkuTable_t. uint32_t golden_pp_id; // PPGen use only: PP Table ID on the Golden Data Base @@ -159,6 +169,36 @@ struct smu_14_0_2_powerplay_table PPTable_t smc_pptable; // PPTable_t in driver_if.h -- as requested by PMFW, this offset should start at a 32-byte boundary, and the table_size above should remain at offset=6 bytes }; +enum SMU_14_0_2_CUSTOM_OD_SW_FEATURE_CAP { + SMU_14_0_2_CUSTOM_ODCAP_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODCAP_COUNT +}; + +enum SMU_14_0_2_CUSTOM_OD_FEATURE_SETTING_ID { + SMU_14_0_2_CUSTOM_ODSETTING_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODSETTING_COUNT, +}; + +struct smu_14_0_2_custom_overdrive_table { + uint8_t revision; + uint8_t reserve[3]; + uint8_t cap[SMU_14_0_2_CUSTOM_ODCAP_COUNT]; + int32_t max[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int32_t min[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int16_t pm_setting[SMU_14_0_2_PMSETTING_COUNT]; +}; + +struct smu_14_0_3_custom_powerplay_table { + uint8_t custom_table_revision; + uint16_t custom_table_size; + uint16_t custom_sku_table_offset; + uint32_t custom_platform_caps; + uint16_t software_shutdown_temp; + struct smu_14_0_2_custom_overdrive_table custom_overdrive_table; + uint32_t reserve[8]; + CustomSkuTable_t custom_sku_table_pmfw; +}; + #pragma pack(pop) #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 076620fa3ef5..16af1a329621 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1989,7 +1989,7 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, - "MEMLK", + "MEMCLK", activity_monitor.Mem_FPS, activity_monitor.Mem_MinFreqStep, activity_monitor.Mem_MinActiveFreqType, @@ -2051,7 +2051,7 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u activity_monitor.Soc_PD_Data_error_coeff = input[8]; activity_monitor.Soc_PD_Data_error_rate_coeff = input[9]; break; - case 2: /* Memlk */ + case 2: /* Memclk */ activity_monitor.Mem_FPS = input[1]; activity_monitor.Mem_MinFreqStep = input[2]; activity_monitor.Mem_MinActiveFreqType = input[3]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 0d3e1a121b67..9c3c48297cba 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1691,7 +1691,7 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, - "MEMLK", + "MEMCLK", activity_monitor->Mem_FPS, activity_monitor->Mem_MinFreqStep, activity_monitor->Mem_MinActiveFreqType, @@ -1756,7 +1756,7 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long * activity_monitor->Fclk_PD_Data_error_coeff = input[8]; activity_monitor->Fclk_PD_Data_error_rate_coeff = input[9]; break; - case 2: /* Memlk */ + case 2: /* Memclk */ activity_monitor->Mem_FPS = input[1]; activity_monitor->Mem_MinFreqStep = input[2]; activity_monitor->Mem_MinActiveFreqType = input[3]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 78c3f94bb3ff..9974c9f8135e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -121,6 +121,7 @@ struct mca_ras_info { #define P2S_TABLE_ID_A 0x50325341 #define P2S_TABLE_ID_X 0x50325358 +#define P2S_TABLE_ID_3 0x50325303 // clang-format off static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { @@ -271,14 +272,18 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t p2s_table_id = P2S_TABLE_ID_A; int ret = 0, i, p2stable_count; + int var = (adev->pdev->device & 0xF); char ucode_prefix[15]; /* No need to load P2S tables in IOV mode */ if (amdgpu_sriov_vf(adev)) return 0; - if (!(adev->flags & AMD_IS_APU)) + if (!(adev->flags & AMD_IS_APU)) { p2s_table_id = P2S_TABLE_ID_X; + if (var == 0x5) + p2s_table_id = P2S_TABLE_ID_3; + } amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 09973615f210..865e916fc425 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -452,17 +452,26 @@ int smu_v14_0_init_smc_tables(struct smu_context *smu) ret = -ENOMEM; goto err3_out; } + + smu_table->user_overdrive_table = + kzalloc(tables[SMU_TABLE_OVERDRIVE].size, GFP_KERNEL); + if (!smu_table->user_overdrive_table) { + ret = -ENOMEM; + goto err4_out; + } } smu_table->combo_pptable = kzalloc(tables[SMU_TABLE_COMBO_PPTABLE].size, GFP_KERNEL); if (!smu_table->combo_pptable) { ret = -ENOMEM; - goto err4_out; + goto err5_out; } return 0; +err5_out: + kfree(smu_table->user_overdrive_table); err4_out: kfree(smu_table->boot_overdrive_table); err3_out: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index e1a27903c80a..a31fae5feedf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -68,6 +68,18 @@ #define DEBUGSMC_MSG_Mode1Reset 2 #define LINK_SPEED_MAX 3 +#define PP_OD_FEATURE_GFXCLK_FMIN 0 +#define PP_OD_FEATURE_GFXCLK_FMAX 1 +#define PP_OD_FEATURE_UCLK_FMIN 2 +#define PP_OD_FEATURE_UCLK_FMAX 3 +#define PP_OD_FEATURE_GFX_VF_CURVE 4 +#define PP_OD_FEATURE_FAN_CURVE_TEMP 5 +#define PP_OD_FEATURE_FAN_CURVE_PWM 6 +#define PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT 7 +#define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 +#define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 +#define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 + static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -115,7 +127,6 @@ static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0), - MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), @@ -204,6 +215,7 @@ static struct cmn2asic_mapping smu_v14_0_2_table_map[SMU_TABLE_COUNT] = { [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, TAB_MAP(I2C_COMMANDS), TAB_MAP(ECCINFO), + TAB_MAP(OVERDRIVE), }; static struct cmn2asic_mapping smu_v14_0_2_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -1029,16 +1041,97 @@ static int smu_v14_0_2_get_current_clk_freq_by_table(struct smu_context *smu, value); } +static bool smu_v14_0_2_is_od_feature_supported(struct smu_context *smu, + int od_feature_bit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + const OverDriveLimits_t * const overdrive_upperlimits = + &pptable->SkuTable.OverDriveLimitsBasicMax; + + return overdrive_upperlimits->FeatureCtrlMask & (1U << od_feature_bit); +} + +static void smu_v14_0_2_get_od_setting_limits(struct smu_context *smu, + int od_feature_bit, + int32_t *min, + int32_t *max) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + const OverDriveLimits_t * const overdrive_upperlimits = + &pptable->SkuTable.OverDriveLimitsBasicMax; + const OverDriveLimits_t * const overdrive_lowerlimits = + &pptable->SkuTable.OverDriveLimitsBasicMin; + int32_t od_min_setting, od_max_setting; + + switch (od_feature_bit) { + case PP_OD_FEATURE_GFXCLK_FMIN: + od_min_setting = overdrive_lowerlimits->GfxclkFmin; + od_max_setting = overdrive_upperlimits->GfxclkFmin; + break; + case PP_OD_FEATURE_GFXCLK_FMAX: + od_min_setting = overdrive_lowerlimits->GfxclkFmax; + od_max_setting = overdrive_upperlimits->GfxclkFmax; + break; + case PP_OD_FEATURE_UCLK_FMIN: + od_min_setting = overdrive_lowerlimits->UclkFmin; + od_max_setting = overdrive_upperlimits->UclkFmin; + break; + case PP_OD_FEATURE_UCLK_FMAX: + od_min_setting = overdrive_lowerlimits->UclkFmax; + od_max_setting = overdrive_upperlimits->UclkFmax; + break; + case PP_OD_FEATURE_GFX_VF_CURVE: + od_min_setting = overdrive_lowerlimits->VoltageOffsetPerZoneBoundary[0]; + od_max_setting = overdrive_upperlimits->VoltageOffsetPerZoneBoundary[0]; + break; + case PP_OD_FEATURE_FAN_CURVE_TEMP: + od_min_setting = overdrive_lowerlimits->FanLinearTempPoints[0]; + od_max_setting = overdrive_upperlimits->FanLinearTempPoints[0]; + break; + case PP_OD_FEATURE_FAN_CURVE_PWM: + od_min_setting = overdrive_lowerlimits->FanLinearPwmPoints[0]; + od_max_setting = overdrive_upperlimits->FanLinearPwmPoints[0]; + break; + case PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT: + od_min_setting = overdrive_lowerlimits->AcousticLimitRpmThreshold; + od_max_setting = overdrive_upperlimits->AcousticLimitRpmThreshold; + break; + case PP_OD_FEATURE_FAN_ACOUSTIC_TARGET: + od_min_setting = overdrive_lowerlimits->AcousticTargetRpmThreshold; + od_max_setting = overdrive_upperlimits->AcousticTargetRpmThreshold; + break; + case PP_OD_FEATURE_FAN_TARGET_TEMPERATURE: + od_min_setting = overdrive_lowerlimits->FanTargetTemperature; + od_max_setting = overdrive_upperlimits->FanTargetTemperature; + break; + case PP_OD_FEATURE_FAN_MINIMUM_PWM: + od_min_setting = overdrive_lowerlimits->FanMinimumPwm; + od_max_setting = overdrive_upperlimits->FanMinimumPwm; + break; + default: + od_min_setting = od_max_setting = INT_MAX; + break; + } + + if (min) + *min = od_min_setting; + if (max) + *max = od_max_setting; +} + static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { struct smu_dpm_context *smu_dpm = &smu->smu_dpm; struct smu_14_0_dpm_context *dpm_context = smu_dpm->dpm_context; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)smu->smu_table.overdrive_table; struct smu_14_0_dpm_table *single_dpm_table; struct smu_14_0_pcie_table *pcie_table; uint32_t gen_speed, lane_width; int i, curr_freq, size = 0; + int32_t min_value, max_value; int ret = 0; smu_cmn_get_sysfs_buf(&buf, &size); @@ -1159,6 +1252,183 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, "*" : ""); break; + case SMU_OD_SCLK: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_GFXCLK_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_SCLK:\n"); + size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", + od_table->OverDriveTable.GfxclkFmin, + od_table->OverDriveTable.GfxclkFmax); + break; + + case SMU_OD_MCLK: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_UCLK_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_MCLK:\n"); + size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMHz\n", + od_table->OverDriveTable.UclkFmin, + od_table->OverDriveTable.UclkFmax); + break; + + case SMU_OD_VDDGFX_OFFSET: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_GFX_VF_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_VDDGFX_OFFSET:\n"); + size += sysfs_emit_at(buf, size, "%dmV\n", + od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[0]); + break; + + case SMU_OD_FAN_CURVE: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_FAN_CURVE:\n"); + for (i = 0; i < NUM_OD_FAN_MAX_POINTS - 1; i++) + size += sysfs_emit_at(buf, size, "%d: %dC %d%%\n", + i, + (int)od_table->OverDriveTable.FanLinearTempPoints[i], + (int)od_table->OverDriveTable.FanLinearPwmPoints[i]); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_TEMP, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "FAN_CURVE(hotspot temp): %uC %uC\n", + min_value, max_value); + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_PWM, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "FAN_CURVE(fan speed): %u%% %u%%\n", + min_value, max_value); + + break; + + case SMU_OD_ACOUSTIC_LIMIT: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_ACOUSTIC_LIMIT:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.AcousticLimitRpmThreshold); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ACOUSTIC_LIMIT: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_ACOUSTIC_TARGET: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "OD_ACOUSTIC_TARGET:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.AcousticTargetRpmThreshold); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_TARGET, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ACOUSTIC_TARGET: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_FAN_TARGET_TEMPERATURE: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_TARGET_TEMPERATURE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanTargetTemperature); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_TARGET_TEMPERATURE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "TARGET_TEMPERATURE: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_FAN_MINIMUM_PWM: + if (!smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_MINIMUM_PWM:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanMinimumPwm); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_MINIMUM_PWM, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "MINIMUM_PWM: %u %u\n", + min_value, max_value); + break; + + case SMU_OD_RANGE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && + !smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && + !smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) + break; + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + + if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT)) { + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMIN, + &min_value, + NULL); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMAX, + NULL, + &max_value); + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT)) { + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMIN, + &min_value, + NULL); + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMAX, + NULL, + &max_value); + size += sysfs_emit_at(buf, size, "MCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) { + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFX_VF_CURVE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "VDDGFX_OFFSET: %7dmv %10dmv\n", + min_value, max_value); + } + break; + default: break; } @@ -1400,7 +1670,27 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu, uint32_t *max_power_limit, uint32_t *min_power_limit) { - // TODO + struct smu_table_context *table_context = &smu->smu_table; + PPTable_t *pptable = table_context->driver_pptable; + CustomSkuTable_t *skutable = &pptable->CustomSkuTable; + uint32_t power_limit; + uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + + if (smu_v14_0_get_current_power_limit(smu, &power_limit)) + power_limit = smu->adev->pm.ac_power ? + skutable->SocketPowerLimitAc[PPT_THROTTLER_PPT0] : + skutable->SocketPowerLimitDc[PPT_THROTTLER_PPT0]; + + if (current_power_limit) + *current_power_limit = power_limit; + if (default_power_limit) + *default_power_limit = power_limit; + + if (max_power_limit) + *max_power_limit = msg_limit; + + if (min_power_limit) + *min_power_limit = 0; return 0; } @@ -1824,50 +2114,6 @@ static void smu_v14_0_2_set_smu_mailbox_registers(struct smu_context *smu) smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, regMP1_SMN_C2PMSG_54); } -static int smu_v14_0_2_smu_send_bad_mem_page_num(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad page number on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetNumBadMemoryPagesRetired, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages number\n", - __func__); - - return ret; -} - -static int smu_v14_0_2_send_bad_mem_channel_flag(struct smu_context *smu, - uint32_t size) -{ - int ret = 0; - - /* message SMU to update the bad channel info on SMUBUS */ - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, - size, NULL); - if (ret) - dev_err(smu->adev->dev, - "[%s] failed to message SMU to update bad memory pages channel info\n", - __func__); - - return ret; -} - -static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, - void *table) -{ - int ret = 0; - - // TODO - - return ret; -} - static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, void **table) { @@ -1950,6 +2196,594 @@ static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v1_3); } +static void smu_v14_0_2_dump_od_table(struct smu_context *smu, + OverDriveTableExternal_t *od_table) +{ + struct amdgpu_device *adev = smu->adev; + + dev_dbg(adev->dev, "OD: Gfxclk: (%d, %d)\n", od_table->OverDriveTable.GfxclkFmin, + od_table->OverDriveTable.GfxclkFmax); + dev_dbg(adev->dev, "OD: Uclk: (%d, %d)\n", od_table->OverDriveTable.UclkFmin, + od_table->OverDriveTable.UclkFmax); +} + +static int smu_v14_0_2_upload_overdrive_table(struct smu_context *smu, + OverDriveTableExternal_t *od_table) +{ + int ret; + ret = smu_cmn_update_table(smu, + SMU_TABLE_OVERDRIVE, + 0, + (void *)od_table, + true); + if (ret) + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + + return ret; +} + +static void smu_v14_0_2_set_supported_od_feature_mask(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + if (smu_v14_0_2_is_od_feature_supported(smu, + PP_OD_FEATURE_FAN_CURVE_BIT)) + adev->pm.od_feature_mask |= OD_OPS_SUPPORT_FAN_CURVE_RETRIEVE | + OD_OPS_SUPPORT_FAN_CURVE_SET | + OD_OPS_SUPPORT_ACOUSTIC_LIMIT_THRESHOLD_RETRIEVE | + OD_OPS_SUPPORT_ACOUSTIC_LIMIT_THRESHOLD_SET | + OD_OPS_SUPPORT_ACOUSTIC_TARGET_THRESHOLD_RETRIEVE | + OD_OPS_SUPPORT_ACOUSTIC_TARGET_THRESHOLD_SET | + OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | + OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; +} + +static int smu_v14_0_2_get_overdrive_table(struct smu_context *smu, + OverDriveTableExternal_t *od_table) +{ + int ret; + ret = smu_cmn_update_table(smu, + SMU_TABLE_OVERDRIVE, + 0, + (void *)od_table, + false); + if (ret) + dev_err(smu->adev->dev, "Failed to get overdrive table!\n"); + + return ret; +} + +static int smu_v14_0_2_set_default_od_settings(struct smu_context *smu) +{ + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)smu->smu_table.overdrive_table; + OverDriveTableExternal_t *boot_od_table = + (OverDriveTableExternal_t *)smu->smu_table.boot_overdrive_table; + OverDriveTableExternal_t *user_od_table = + (OverDriveTableExternal_t *)smu->smu_table.user_overdrive_table; + OverDriveTableExternal_t user_od_table_bak; + int ret; + int i; + + ret = smu_v14_0_2_get_overdrive_table(smu, boot_od_table); + if (ret) + return ret; + + smu_v14_0_2_dump_od_table(smu, boot_od_table); + + memcpy(od_table, + boot_od_table, + sizeof(OverDriveTableExternal_t)); + + /* + * For S3/S4/Runpm resume, we need to setup those overdrive tables again, + * but we have to preserve user defined values in "user_od_table". + */ + if (!smu->adev->in_suspend) { + memcpy(user_od_table, + boot_od_table, + sizeof(OverDriveTableExternal_t)); + smu->user_dpm_profile.user_od = false; + } else if (smu->user_dpm_profile.user_od) { + memcpy(&user_od_table_bak, + user_od_table, + sizeof(OverDriveTableExternal_t)); + memcpy(user_od_table, + boot_od_table, + sizeof(OverDriveTableExternal_t)); + user_od_table->OverDriveTable.GfxclkFmin = + user_od_table_bak.OverDriveTable.GfxclkFmin; + user_od_table->OverDriveTable.GfxclkFmax = + user_od_table_bak.OverDriveTable.GfxclkFmax; + user_od_table->OverDriveTable.UclkFmin = + user_od_table_bak.OverDriveTable.UclkFmin; + user_od_table->OverDriveTable.UclkFmax = + user_od_table_bak.OverDriveTable.UclkFmax; + for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++) + user_od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i] = + user_od_table_bak.OverDriveTable.VoltageOffsetPerZoneBoundary[i]; + for (i = 0; i < NUM_OD_FAN_MAX_POINTS - 1; i++) { + user_od_table->OverDriveTable.FanLinearTempPoints[i] = + user_od_table_bak.OverDriveTable.FanLinearTempPoints[i]; + user_od_table->OverDriveTable.FanLinearPwmPoints[i] = + user_od_table_bak.OverDriveTable.FanLinearPwmPoints[i]; + } + user_od_table->OverDriveTable.AcousticLimitRpmThreshold = + user_od_table_bak.OverDriveTable.AcousticLimitRpmThreshold; + user_od_table->OverDriveTable.AcousticTargetRpmThreshold = + user_od_table_bak.OverDriveTable.AcousticTargetRpmThreshold; + user_od_table->OverDriveTable.FanTargetTemperature = + user_od_table_bak.OverDriveTable.FanTargetTemperature; + user_od_table->OverDriveTable.FanMinimumPwm = + user_od_table_bak.OverDriveTable.FanMinimumPwm; + } + + smu_v14_0_2_set_supported_od_feature_mask(smu); + + return 0; +} + +static int smu_v14_0_2_restore_user_od_settings(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = table_context->overdrive_table; + OverDriveTableExternal_t *user_od_table = table_context->user_overdrive_table; + int res; + + user_od_table->OverDriveTable.FeatureCtrlMask = BIT(PP_OD_FEATURE_GFXCLK_BIT) | + BIT(PP_OD_FEATURE_UCLK_BIT) | + BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT) | + BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + res = smu_v14_0_2_upload_overdrive_table(smu, user_od_table); + user_od_table->OverDriveTable.FeatureCtrlMask = 0; + if (res == 0) + memcpy(od_table, user_od_table, sizeof(OverDriveTableExternal_t)); + + return res; +} + +static int smu_v14_0_2_od_restore_table_single(struct smu_context *smu, long input) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *boot_overdrive_table = + (OverDriveTableExternal_t *)table_context->boot_overdrive_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + struct amdgpu_device *adev = smu->adev; + int i; + + switch (input) { + case PP_OD_EDIT_FAN_CURVE: + for (i = 0; i < NUM_OD_FAN_MAX_POINTS; i++) { + od_table->OverDriveTable.FanLinearTempPoints[i] = + boot_overdrive_table->OverDriveTable.FanLinearTempPoints[i]; + od_table->OverDriveTable.FanLinearPwmPoints[i] = + boot_overdrive_table->OverDriveTable.FanLinearPwmPoints[i]; + } + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_ACOUSTIC_LIMIT: + od_table->OverDriveTable.AcousticLimitRpmThreshold = + boot_overdrive_table->OverDriveTable.AcousticLimitRpmThreshold; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_ACOUSTIC_TARGET: + od_table->OverDriveTable.AcousticTargetRpmThreshold = + boot_overdrive_table->OverDriveTable.AcousticTargetRpmThreshold; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_FAN_TARGET_TEMPERATURE: + od_table->OverDriveTable.FanTargetTemperature = + boot_overdrive_table->OverDriveTable.FanTargetTemperature; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + case PP_OD_EDIT_FAN_MINIMUM_PWM: + od_table->OverDriveTable.FanMinimumPwm = + boot_overdrive_table->OverDriveTable.FanMinimumPwm; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + default: + dev_info(adev->dev, "Invalid table index: %ld\n", input); + return -EINVAL; + } + + return 0; +} + +static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, + enum PP_OD_DPM_TABLE_COMMAND type, + long input[], + uint32_t size) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + struct amdgpu_device *adev = smu->adev; + uint32_t offset_of_voltageoffset; + int32_t minimum, maximum; + uint32_t feature_ctrlmask; + int i, ret = 0; + + switch (type) { + case PP_OD_EDIT_SCLK_VDDC_TABLE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT)) { + dev_warn(adev->dev, "GFXCLK_LIMITS setting not supported!\n"); + return -ENOTSUPP; + } + + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + dev_info(adev->dev, "invalid number of input parameters %d\n", size); + return -EINVAL; + } + + switch (input[i]) { + case 0: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMIN, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "GfxclkFmin (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.GfxclkFmin = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; + break; + + case 1: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMAX, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "GfxclkFmax (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.GfxclkFmax = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; + break; + + default: + dev_info(adev->dev, "Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]); + dev_info(adev->dev, "Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + } + + if (od_table->OverDriveTable.GfxclkFmin > od_table->OverDriveTable.GfxclkFmax) { + dev_err(adev->dev, + "Invalid setting: GfxclkFmin(%u) is bigger than GfxclkFmax(%u)\n", + (uint32_t)od_table->OverDriveTable.GfxclkFmin, + (uint32_t)od_table->OverDriveTable.GfxclkFmax); + return -EINVAL; + } + break; + + case PP_OD_EDIT_MCLK_VDDC_TABLE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT)) { + dev_warn(adev->dev, "UCLK_LIMITS setting not supported!\n"); + return -ENOTSUPP; + } + + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + dev_info(adev->dev, "invalid number of input parameters %d\n", size); + return -EINVAL; + } + + switch (input[i]) { + case 0: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMIN, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "UclkFmin (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.UclkFmin = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_UCLK_BIT; + break; + + case 1: + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_UCLK_FMAX, + &minimum, + &maximum); + if (input[i + 1] < minimum || + input[i + 1] > maximum) { + dev_info(adev->dev, "UclkFmax (%ld) must be within [%u, %u]!\n", + input[i + 1], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.UclkFmax = input[i + 1]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_UCLK_BIT; + break; + + default: + dev_info(adev->dev, "Invalid MCLK_VDDC_TABLE index: %ld\n", input[i]); + dev_info(adev->dev, "Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + } + + if (od_table->OverDriveTable.UclkFmin > od_table->OverDriveTable.UclkFmax) { + dev_err(adev->dev, + "Invalid setting: UclkFmin(%u) is bigger than UclkFmax(%u)\n", + (uint32_t)od_table->OverDriveTable.UclkFmin, + (uint32_t)od_table->OverDriveTable.UclkFmax); + return -EINVAL; + } + break; + + case PP_OD_EDIT_VDDGFX_OFFSET: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFX_VF_CURVE_BIT)) { + dev_warn(adev->dev, "Gfx offset setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFX_VF_CURVE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "Voltage offset (%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + for (i = 0; i < PP_NUM_OD_VF_CURVE_POINTS; i++) + od_table->OverDriveTable.VoltageOffsetPerZoneBoundary[i] = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_GFX_VF_CURVE_BIT); + break; + + case PP_OD_EDIT_FAN_CURVE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + if (input[0] >= NUM_OD_FAN_MAX_POINTS - 1 || + input[0] < 0) + return -EINVAL; + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_TEMP, + &minimum, + &maximum); + if (input[1] < minimum || + input[1] > maximum) { + dev_info(adev->dev, "Fan curve temp setting(%ld) must be within [%d, %d]!\n", + input[1], minimum, maximum); + return -EINVAL; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_CURVE_PWM, + &minimum, + &maximum); + if (input[2] < minimum || + input[2] > maximum) { + dev_info(adev->dev, "Fan curve pwm setting(%ld) must be within [%d, %d]!\n", + input[2], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanLinearTempPoints[input[0]] = input[1]; + od_table->OverDriveTable.FanLinearPwmPoints[input[0]] = input[2]; + od_table->OverDriveTable.FanMode = FAN_MODE_MANUAL_LINEAR; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_ACOUSTIC_LIMIT: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_LIMIT, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "acoustic limit threshold setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.AcousticLimitRpmThreshold = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_ACOUSTIC_TARGET: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ACOUSTIC_TARGET, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "acoustic target threshold setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.AcousticTargetRpmThreshold = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_FAN_TARGET_TEMPERATURE: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_TARGET_TEMPERATURE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "fan target temperature setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanTargetTemperature = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_EDIT_FAN_MINIMUM_PWM: + if (!smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_FAN_CURVE_BIT)) { + dev_warn(adev->dev, "Fan curve setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_MINIMUM_PWM, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "fan minimum pwm setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanMinimumPwm = input[0]; + od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); + break; + + case PP_OD_RESTORE_DEFAULT_TABLE: + if (size == 1) { + ret = smu_v14_0_2_od_restore_table_single(smu, input[0]); + if (ret) + return ret; + } else { + feature_ctrlmask = od_table->OverDriveTable.FeatureCtrlMask; + memcpy(od_table, + table_context->boot_overdrive_table, + sizeof(OverDriveTableExternal_t)); + od_table->OverDriveTable.FeatureCtrlMask = feature_ctrlmask; + } + fallthrough; + case PP_OD_COMMIT_DPM_TABLE: + /* + * The member below instructs PMFW the settings focused in + * this single operation. + * `uint32_t FeatureCtrlMask;` + * It does not contain actual informations about user's custom + * settings. Thus we do not cache it. + */ + offset_of_voltageoffset = offsetof(OverDriveTable_t, VoltageOffsetPerZoneBoundary); + if (memcmp((u8 *)od_table + offset_of_voltageoffset, + table_context->user_overdrive_table + offset_of_voltageoffset, + sizeof(OverDriveTableExternal_t) - offset_of_voltageoffset)) { + smu_v14_0_2_dump_od_table(smu, od_table); + + ret = smu_v14_0_2_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + od_table->OverDriveTable.FeatureCtrlMask = 0; + memcpy(table_context->user_overdrive_table + offset_of_voltageoffset, + (u8 *)od_table + offset_of_voltageoffset, + sizeof(OverDriveTableExternal_t) - offset_of_voltageoffset); + + if (!memcmp(table_context->user_overdrive_table, + table_context->boot_overdrive_table, + sizeof(OverDriveTableExternal_t))) + smu->user_dpm_profile.user_od = false; + else + smu->user_dpm_profile.user_od = true; + } + break; + + default: + return -ENOSYS; + } + + return ret; +} + +static int smu_v14_0_2_set_power_limit(struct smu_context *smu, + enum smu_ppt_limit_type limit_type, + uint32_t limit) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC]; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTableExternal_t *od_table = + (OverDriveTableExternal_t *)table_context->overdrive_table; + int ret = 0; + + if (limit_type != SMU_DEFAULT_PPT_LIMIT) + return -EINVAL; + + if (limit <= msg_limit) { + if (smu->current_power_limit > msg_limit) { + od_table->OverDriveTable.Ppt = 0; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v14_0_2_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + } + return smu_v14_0_set_power_limit(smu, limit_type, limit); + } else if (smu->od_enabled) { + ret = smu_v14_0_set_power_limit(smu, limit_type, msg_limit); + if (ret) + return ret; + + od_table->OverDriveTable.Ppt = (limit * 100) / msg_limit - 100; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_PPT_BIT; + + ret = smu_v14_0_2_upload_overdrive_table(smu, od_table); + if (ret) { + dev_err(smu->adev->dev, "Failed to upload overdrive table!\n"); + return ret; + } + + smu->current_power_limit = limit; + } else { + return -EINVAL; + } + + return 0; +} + static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .get_allowed_feature_mask = smu_v14_0_2_get_allowed_feature_mask, .set_default_dpm_table = smu_v14_0_2_set_default_dpm_table, @@ -1988,13 +2822,16 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .notify_memory_pool_location = smu_v14_0_notify_memory_pool_location, .get_gpu_metrics = smu_v14_0_2_get_gpu_metrics, .set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range, + .set_default_od_settings = smu_v14_0_2_set_default_od_settings, + .restore_user_od_settings = smu_v14_0_2_restore_user_od_settings, + .od_edit_dpm_table = smu_v14_0_2_od_edit_dpm_table, .init_pptable_microcode = smu_v14_0_init_pptable_microcode, .populate_umd_state_clk = smu_v14_0_2_populate_umd_state_clk, .set_performance_level = smu_v14_0_set_performance_level, .gfx_off_control = smu_v14_0_gfx_off_control, .get_unique_id = smu_v14_0_2_get_unique_id, .get_power_limit = smu_v14_0_2_get_power_limit, - .set_power_limit = smu_v14_0_set_power_limit, + .set_power_limit = smu_v14_0_2_set_power_limit, .set_power_source = smu_v14_0_set_power_source, .get_power_profile_mode = smu_v14_0_2_get_power_profile_mode, .set_power_profile_mode = smu_v14_0_2_set_power_profile_mode, @@ -2015,12 +2852,9 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_gfx_features = smu_v14_0_2_enable_gfx_features, .set_mp1_state = smu_v14_0_2_set_mp1_state, .set_df_cstate = smu_v14_0_2_set_df_cstate, - .send_hbm_bad_pages_num = smu_v14_0_2_smu_send_bad_mem_page_num, - .send_hbm_bad_channel_flag = smu_v14_0_2_send_bad_mem_channel_flag, #if 0 .gpo_control = smu_v14_0_gpo_control, #endif - .get_ecc_info = smu_v14_0_2_get_ecc_info, }; void smu_v14_0_2_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index e8695e390686..a040d7dfced1 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -2343,7 +2343,7 @@ drm_dp_mst_handle_link_address_port(struct drm_dp_mst_branch *mstb, { struct drm_dp_mst_topology_mgr *mgr = mstb->mgr; struct drm_dp_mst_port *port; - int old_ddps = 0, ret; + int ret; u8 new_pdt = DP_PEER_DEVICE_NONE; bool new_mcs = 0; bool created = false, send_link_addr = false, changed = false; @@ -2376,7 +2376,6 @@ drm_dp_mst_handle_link_address_port(struct drm_dp_mst_branch *mstb, */ drm_modeset_lock(&mgr->base.lock, NULL); - old_ddps = port->ddps; changed = port->ddps != port_msg->ddps || (port->ddps && (port->ldps != port_msg->legacy_device_plug_status || @@ -2411,15 +2410,13 @@ drm_dp_mst_handle_link_address_port(struct drm_dp_mst_branch *mstb, * Reprobe PBN caps on both hotplug, and when re-probing the link * for our parent mstb */ - if (old_ddps != port->ddps || !created) { - if (port->ddps && !port->input) { - ret = drm_dp_send_enum_path_resources(mgr, mstb, - port); - if (ret == 1) - changed = true; - } else { - port->full_pbn = 0; - } + if (port->ddps && !port->input) { + ret = drm_dp_send_enum_path_resources(mgr, mstb, + port); + if (ret == 1) + changed = true; + } else { + port->full_pbn = 0; } ret = drm_dp_port_set_pdt(port, new_pdt, new_mcs); @@ -2695,6 +2692,11 @@ static void drm_dp_mst_link_probe_work(struct work_struct *work) drm_kms_helper_hotplug_event(dev); } +static void drm_dp_mst_queue_probe_work(struct drm_dp_mst_topology_mgr *mgr) +{ + queue_work(system_long_wq, &mgr->work); +} + static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr, guid_t *guid) { @@ -3683,7 +3685,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms /* Write reset payload */ drm_dp_dpcd_write_payload(mgr, 0, 0, 0x3f); - queue_work(system_long_wq, &mgr->work); + drm_dp_mst_queue_probe_work(mgr); ret = 0; } else { @@ -3722,6 +3724,33 @@ drm_dp_mst_topology_mgr_invalidate_mstb(struct drm_dp_mst_branch *mstb) } /** + * drm_dp_mst_topology_queue_probe - Queue a topology probe + * @mgr: manager to probe + * + * Queue a work to probe the MST topology. Driver's should call this only to + * sync the topology's HW->SW state after the MST link's parameters have + * changed in a way the state could've become out-of-sync. This is the case + * for instance when the link rate between the source and first downstream + * branch device has switched between UHBR and non-UHBR rates. Except of those + * cases - for instance when a sink gets plugged/unplugged to a port - the SW + * state will get updated automatically via MST UP message notifications. + */ +void drm_dp_mst_topology_queue_probe(struct drm_dp_mst_topology_mgr *mgr) +{ + mutex_lock(&mgr->lock); + + if (drm_WARN_ON(mgr->dev, !mgr->mst_state || !mgr->mst_primary)) + goto out_unlock; + + drm_dp_mst_topology_mgr_invalidate_mstb(mgr->mst_primary); + drm_dp_mst_queue_probe_work(mgr); + +out_unlock: + mutex_unlock(&mgr->lock); +} +EXPORT_SYMBOL(drm_dp_mst_topology_queue_probe); + +/** * drm_dp_mst_topology_mgr_suspend() - suspend the MST manager * @mgr: manager to suspend * @@ -3810,7 +3839,7 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, * state of our in-memory topology back into sync with reality. So, * restart the probing process as if we're probing a new hub */ - queue_work(system_long_wq, &mgr->work); + drm_dp_mst_queue_probe_work(mgr); mutex_unlock(&mgr->lock); if (sync) { diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 7609c798d73d..7936c2023955 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -1071,23 +1071,16 @@ int drm_atomic_set_property(struct drm_atomic_state *state, } if (async_flip && - prop != config->prop_fb_id && - prop != config->prop_in_fence_fd && - prop != config->prop_fb_damage_clips) { + (plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY || + (prop != config->prop_fb_id && + prop != config->prop_in_fence_fd && + prop != config->prop_fb_damage_clips))) { ret = drm_atomic_plane_get_property(plane, plane_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); break; } - if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) { - drm_dbg_atomic(prop->dev, - "[OBJECT:%d] Only primary planes can be changed during async flip\n", - obj->id); - ret = -EINVAL; - break; - } - ret = drm_atomic_plane_set_property(plane, plane_state, file_priv, prop, prop_value); diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 6a8e45e9d0ec..103c185bb1c8 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -851,6 +851,7 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * drm_buddy_block_trim - free unused pages * * @mm: DRM buddy manager + * @start: start address to begin the trimming. * @new_size: original size requested * @blocks: Input and output list of allocated blocks. * MUST contain single block as input to be trimmed. @@ -866,11 +867,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * 0 on success, error code on failure. */ int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks) { struct drm_buddy_block *parent; struct drm_buddy_block *block; + u64 block_start, block_end; LIST_HEAD(dfs); u64 new_start; int err; @@ -882,6 +885,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm, struct drm_buddy_block, link); + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block); + if (WARN_ON(!drm_buddy_block_is_allocated(block))) return -EINVAL; @@ -894,6 +900,20 @@ int drm_buddy_block_trim(struct drm_buddy *mm, if (new_size == drm_buddy_block_size(mm, block)) return 0; + new_start = block_start; + if (start) { + new_start = *start; + + if (new_start < block_start) + return -EINVAL; + + if (!IS_ALIGNED(new_start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(new_start, new_size, block_end)) + return -EINVAL; + } + list_del(&block->link); mark_free(mm, block); mm->avail += drm_buddy_block_size(mm, block); @@ -904,7 +924,6 @@ int drm_buddy_block_trim(struct drm_buddy *mm, parent = block->parent; block->parent = NULL; - new_start = drm_buddy_block_offset(block); list_add(&block->tmp_link, &dfs); err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); if (err) { @@ -1066,7 +1085,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } while (1); /* Trim the allocated block to the required size */ - if (original_size != size) { + if (!(flags & DRM_BUDDY_TRIM_DISABLE) && + original_size != size) { struct list_head *trim_list; LIST_HEAD(temp); u64 trim_size; @@ -1083,6 +1103,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } drm_buddy_block_trim(mm, + NULL, trim_size, trim_list); diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 31af5cf37a09..cee5eafbfb81 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -880,6 +880,11 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, kfree(modeset->mode); modeset->mode = drm_mode_duplicate(dev, mode); + if (!modeset->mode) { + ret = -ENOMEM; + break; + } + drm_connector_get(connector); modeset->connectors[modeset->num_connectors++] = connector; modeset->x = offset->x; diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index c16c7678237e..0830cae9a4d0 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -208,6 +208,18 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_BOARD_NAME, "KUN"), }, .driver_data = (void *)&lcd1600x2560_rightside_up, + }, { /* AYN Loki Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Max"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, + }, { /* AYN Loki Zero */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Zero"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, }, { /* Chuwi HiBook (CWI514) */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c index cf24dfdeb6b2..0081190201a7 100644 --- a/drivers/gpu/drm/drm_print.c +++ b/drivers/gpu/drm/drm_print.c @@ -100,8 +100,9 @@ void __drm_puts_coredump(struct drm_printer *p, const char *str) copy = iterator->remain; /* Copy out the bit of the string that we need */ - memcpy(iterator->data, - str + (iterator->start - iterator->offset), copy); + if (iterator->data) + memcpy(iterator->data, + str + (iterator->start - iterator->offset), copy); iterator->offset = iterator->start + copy; iterator->remain -= copy; @@ -110,7 +111,8 @@ void __drm_puts_coredump(struct drm_printer *p, const char *str) len = min_t(ssize_t, strlen(str), iterator->remain); - memcpy(iterator->data + pos, str, len); + if (iterator->data) + memcpy(iterator->data + pos, str, len); iterator->offset += len; iterator->remain -= len; @@ -140,8 +142,9 @@ void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf) if ((iterator->offset >= iterator->start) && (len < iterator->remain)) { ssize_t pos = iterator->offset - iterator->start; - snprintf(((char *) iterator->data) + pos, - iterator->remain, "%pV", vaf); + if (iterator->data) + snprintf(((char *) iterator->data) + pos, + iterator->remain, "%pV", vaf); iterator->offset += len; iterator->remain -= len; diff --git a/drivers/gpu/drm/i915/display/dvo_ch7017.c b/drivers/gpu/drm/i915/display/dvo_ch7017.c index d0c3880d7f80..493e730c685b 100644 --- a/drivers/gpu/drm/i915/display/dvo_ch7017.c +++ b/drivers/gpu/drm/i915/display/dvo_ch7017.c @@ -170,13 +170,13 @@ static bool ch7017_read(struct intel_dvo_device *dvo, u8 addr, u8 *val) { struct i2c_msg msgs[] = { { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 1, .buf = &addr, }, { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD, .len = 1, .buf = val, @@ -189,7 +189,7 @@ static bool ch7017_write(struct intel_dvo_device *dvo, u8 addr, u8 val) { u8 buf[2] = { addr, val }; struct i2c_msg msg = { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 2, .buf = buf, @@ -197,7 +197,7 @@ static bool ch7017_write(struct intel_dvo_device *dvo, u8 addr, u8 val) return i2c_transfer(dvo->i2c_bus, &msg, 1) == 1; } -/** Probes for a CH7017 on the given bus and slave address. */ +/** Probes for a CH7017 on the given bus and target address. */ static bool ch7017_init(struct intel_dvo_device *dvo, struct i2c_adapter *adapter) { @@ -227,13 +227,13 @@ static bool ch7017_init(struct intel_dvo_device *dvo, break; default: DRM_DEBUG_KMS("ch701x not detected, got %d: from %s " - "slave %d.\n", - val, adapter->name, dvo->slave_addr); + "target %d.\n", + val, adapter->name, dvo->target_addr); goto fail; } DRM_DEBUG_KMS("%s detected on %s, addr %d\n", - str, adapter->name, dvo->slave_addr); + str, adapter->name, dvo->target_addr); return true; fail: diff --git a/drivers/gpu/drm/i915/display/dvo_ch7xxx.c b/drivers/gpu/drm/i915/display/dvo_ch7xxx.c index 2e8e85da5a40..534b8544e0a4 100644 --- a/drivers/gpu/drm/i915/display/dvo_ch7xxx.c +++ b/drivers/gpu/drm/i915/display/dvo_ch7xxx.c @@ -153,13 +153,13 @@ static bool ch7xxx_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) struct i2c_msg msgs[] = { { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 1, .buf = out_buf, }, { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD, .len = 1, .buf = in_buf, @@ -176,7 +176,7 @@ static bool ch7xxx_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) if (!ch7xxx->quiet) { DRM_DEBUG_KMS("Unable to read register 0x%02x from %s:%02x.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; } @@ -188,7 +188,7 @@ static bool ch7xxx_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) struct i2c_adapter *adapter = dvo->i2c_bus; u8 out_buf[2]; struct i2c_msg msg = { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 2, .buf = out_buf, @@ -202,7 +202,7 @@ static bool ch7xxx_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) if (!ch7xxx->quiet) { DRM_DEBUG_KMS("Unable to write register 0x%02x to %s:%d.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; @@ -229,8 +229,8 @@ static bool ch7xxx_init(struct intel_dvo_device *dvo, name = ch7xxx_get_id(vendor); if (!name) { - DRM_DEBUG_KMS("ch7xxx not detected; got VID 0x%02x from %s slave %d.\n", - vendor, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("ch7xxx not detected; got VID 0x%02x from %s target %d.\n", + vendor, adapter->name, dvo->target_addr); goto out; } @@ -240,8 +240,8 @@ static bool ch7xxx_init(struct intel_dvo_device *dvo, devid = ch7xxx_get_did(device); if (!devid) { - DRM_DEBUG_KMS("ch7xxx not detected; got DID 0x%02x from %s slave %d.\n", - device, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("ch7xxx not detected; got DID 0x%02x from %s target %d.\n", + device, adapter->name, dvo->target_addr); goto out; } diff --git a/drivers/gpu/drm/i915/display/dvo_ivch.c b/drivers/gpu/drm/i915/display/dvo_ivch.c index eef72bb3b767..0d5cce6051b1 100644 --- a/drivers/gpu/drm/i915/display/dvo_ivch.c +++ b/drivers/gpu/drm/i915/display/dvo_ivch.c @@ -198,7 +198,7 @@ static bool ivch_read(struct intel_dvo_device *dvo, int addr, u16 *data) struct i2c_msg msgs[] = { { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD, .len = 0, }, @@ -209,7 +209,7 @@ static bool ivch_read(struct intel_dvo_device *dvo, int addr, u16 *data) .buf = out_buf, }, { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD | I2C_M_NOSTART, .len = 2, .buf = in_buf, @@ -226,7 +226,7 @@ static bool ivch_read(struct intel_dvo_device *dvo, int addr, u16 *data) if (!priv->quiet) { DRM_DEBUG_KMS("Unable to read register 0x%02x from " "%s:%02x.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; } @@ -238,7 +238,7 @@ static bool ivch_write(struct intel_dvo_device *dvo, int addr, u16 data) struct i2c_adapter *adapter = dvo->i2c_bus; u8 out_buf[3]; struct i2c_msg msg = { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 3, .buf = out_buf, @@ -253,13 +253,13 @@ static bool ivch_write(struct intel_dvo_device *dvo, int addr, u16 data) if (!priv->quiet) { DRM_DEBUG_KMS("Unable to write register 0x%02x to %s:%d.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; } -/* Probes the given bus and slave address for an ivch */ +/* Probes the given bus and target address for an ivch */ static bool ivch_init(struct intel_dvo_device *dvo, struct i2c_adapter *adapter) { @@ -283,10 +283,10 @@ static bool ivch_init(struct intel_dvo_device *dvo, * very unique, check that the value in the base address field matches * the address it's responding on. */ - if ((temp & VR00_BASE_ADDRESS_MASK) != dvo->slave_addr) { + if ((temp & VR00_BASE_ADDRESS_MASK) != dvo->target_addr) { DRM_DEBUG_KMS("ivch detect failed due to address mismatch " "(%d vs %d)\n", - (temp & VR00_BASE_ADDRESS_MASK), dvo->slave_addr); + (temp & VR00_BASE_ADDRESS_MASK), dvo->target_addr); goto out; } diff --git a/drivers/gpu/drm/i915/display/dvo_ns2501.c b/drivers/gpu/drm/i915/display/dvo_ns2501.c index 21486008dae9..9d47f8a93e94 100644 --- a/drivers/gpu/drm/i915/display/dvo_ns2501.c +++ b/drivers/gpu/drm/i915/display/dvo_ns2501.c @@ -398,13 +398,13 @@ static bool ns2501_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) struct i2c_msg msgs[] = { { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 1, .buf = out_buf, }, { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD, .len = 1, .buf = in_buf, @@ -422,7 +422,7 @@ static bool ns2501_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) if (!ns->quiet) { DRM_DEBUG_KMS ("Unable to read register 0x%02x from %s:0x%02x.\n", addr, - adapter->name, dvo->slave_addr); + adapter->name, dvo->target_addr); } return false; @@ -441,7 +441,7 @@ static bool ns2501_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) u8 out_buf[2]; struct i2c_msg msg = { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 2, .buf = out_buf, @@ -456,7 +456,7 @@ static bool ns2501_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) if (!ns->quiet) { DRM_DEBUG_KMS("Unable to write register 0x%02x to %s:%d\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; @@ -487,8 +487,8 @@ static bool ns2501_init(struct intel_dvo_device *dvo, goto out; if (ch != (NS2501_VID & 0xff)) { - DRM_DEBUG_KMS("ns2501 not detected got %d: from %s Slave %d.\n", - ch, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("ns2501 not detected got %d: from %s Target %d.\n", + ch, adapter->name, dvo->target_addr); goto out; } @@ -496,8 +496,8 @@ static bool ns2501_init(struct intel_dvo_device *dvo, goto out; if (ch != (NS2501_DID & 0xff)) { - DRM_DEBUG_KMS("ns2501 not detected got %d: from %s Slave %d.\n", - ch, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("ns2501 not detected got %d: from %s Target %d.\n", + ch, adapter->name, dvo->target_addr); goto out; } ns->quiet = false; diff --git a/drivers/gpu/drm/i915/display/dvo_sil164.c b/drivers/gpu/drm/i915/display/dvo_sil164.c index 6c461024c8e3..a8dd40c00997 100644 --- a/drivers/gpu/drm/i915/display/dvo_sil164.c +++ b/drivers/gpu/drm/i915/display/dvo_sil164.c @@ -79,13 +79,13 @@ static bool sil164_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) struct i2c_msg msgs[] = { { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 1, .buf = out_buf, }, { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD, .len = 1, .buf = in_buf, @@ -102,7 +102,7 @@ static bool sil164_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) if (!sil->quiet) { DRM_DEBUG_KMS("Unable to read register 0x%02x from %s:%02x.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; } @@ -113,7 +113,7 @@ static bool sil164_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) struct i2c_adapter *adapter = dvo->i2c_bus; u8 out_buf[2]; struct i2c_msg msg = { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 2, .buf = out_buf, @@ -127,7 +127,7 @@ static bool sil164_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) if (!sil->quiet) { DRM_DEBUG_KMS("Unable to write register 0x%02x to %s:%d.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; @@ -153,8 +153,8 @@ static bool sil164_init(struct intel_dvo_device *dvo, goto out; if (ch != (SIL164_VID & 0xff)) { - DRM_DEBUG_KMS("sil164 not detected got %d: from %s Slave %d.\n", - ch, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("sil164 not detected got %d: from %s Target %d.\n", + ch, adapter->name, dvo->target_addr); goto out; } @@ -162,8 +162,8 @@ static bool sil164_init(struct intel_dvo_device *dvo, goto out; if (ch != (SIL164_DID & 0xff)) { - DRM_DEBUG_KMS("sil164 not detected got %d: from %s Slave %d.\n", - ch, adapter->name, dvo->slave_addr); + DRM_DEBUG_KMS("sil164 not detected got %d: from %s Target %d.\n", + ch, adapter->name, dvo->target_addr); goto out; } sil->quiet = false; diff --git a/drivers/gpu/drm/i915/display/dvo_tfp410.c b/drivers/gpu/drm/i915/display/dvo_tfp410.c index 0939e097f4f9..d9a0cd753a87 100644 --- a/drivers/gpu/drm/i915/display/dvo_tfp410.c +++ b/drivers/gpu/drm/i915/display/dvo_tfp410.c @@ -100,13 +100,13 @@ static bool tfp410_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) struct i2c_msg msgs[] = { { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 1, .buf = out_buf, }, { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = I2C_M_RD, .len = 1, .buf = in_buf, @@ -123,7 +123,7 @@ static bool tfp410_readb(struct intel_dvo_device *dvo, int addr, u8 *ch) if (!tfp->quiet) { DRM_DEBUG_KMS("Unable to read register 0x%02x from %s:%02x.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; } @@ -134,7 +134,7 @@ static bool tfp410_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) struct i2c_adapter *adapter = dvo->i2c_bus; u8 out_buf[2]; struct i2c_msg msg = { - .addr = dvo->slave_addr, + .addr = dvo->target_addr, .flags = 0, .len = 2, .buf = out_buf, @@ -148,7 +148,7 @@ static bool tfp410_writeb(struct intel_dvo_device *dvo, int addr, u8 ch) if (!tfp->quiet) { DRM_DEBUG_KMS("Unable to write register 0x%02x to %s:%d.\n", - addr, adapter->name, dvo->slave_addr); + addr, adapter->name, dvo->target_addr); } return false; @@ -183,15 +183,15 @@ static bool tfp410_init(struct intel_dvo_device *dvo, if ((id = tfp410_getid(dvo, TFP410_VID_LO)) != TFP410_VID) { DRM_DEBUG_KMS("tfp410 not detected got VID %X: from %s " - "Slave %d.\n", - id, adapter->name, dvo->slave_addr); + "Target %d.\n", + id, adapter->name, dvo->target_addr); goto out; } if ((id = tfp410_getid(dvo, TFP410_DID_LO)) != TFP410_DID) { DRM_DEBUG_KMS("tfp410 not detected got DID %X: from %s " - "Slave %d.\n", - id, adapter->name, dvo->slave_addr); + "Target %d.\n", + id, adapter->name, dvo->target_addr); goto out; } tfp->quiet = false; diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c index a8e746a0f670..c2128b46bdbd 100644 --- a/drivers/gpu/drm/i915/display/g4x_dp.c +++ b/drivers/gpu/drm/i915/display/g4x_dp.c @@ -1279,6 +1279,7 @@ static const struct drm_encoder_funcs intel_dp_enc_funcs = { bool g4x_dp_init(struct drm_i915_private *dev_priv, i915_reg_t output_reg, enum port port) { + struct intel_display *display = &dev_priv->display; const struct intel_bios_encoder_data *devdata; struct intel_digital_port *dig_port; struct intel_encoder *intel_encoder; @@ -1288,7 +1289,7 @@ bool g4x_dp_init(struct drm_i915_private *dev_priv, if (!assert_port_valid(dev_priv, port)) return false; - devdata = intel_bios_encoder_data_lookup(dev_priv, port); + devdata = intel_bios_encoder_data_lookup(display, port); /* FIXME bail? */ if (!devdata) diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c index 8096492b3fad..46f23bdb4c17 100644 --- a/drivers/gpu/drm/i915/display/g4x_hdmi.c +++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c @@ -686,6 +686,7 @@ static bool assert_hdmi_port_valid(struct drm_i915_private *i915, enum port port void g4x_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, enum port port) { + struct intel_display *display = &dev_priv->display; const struct intel_bios_encoder_data *devdata; struct intel_digital_port *dig_port; struct intel_encoder *intel_encoder; @@ -697,7 +698,7 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv, if (!assert_hdmi_port_valid(dev_priv, port)) return; - devdata = intel_bios_encoder_data_lookup(dev_priv, port); + devdata = intel_bios_encoder_data_lookup(display, port); /* FIXME bail? */ if (!devdata) diff --git a/drivers/gpu/drm/i915/display/i9xx_wm.c b/drivers/gpu/drm/i915/display/i9xx_wm.c index 2b7c3d270b17..15cda57fbc91 100644 --- a/drivers/gpu/drm/i915/display/i9xx_wm.c +++ b/drivers/gpu/drm/i915/display/i9xx_wm.c @@ -4028,7 +4028,7 @@ void i9xx_wm_init(struct drm_i915_private *dev_priv) dev_priv->display.funcs.wm = &g4x_wm_funcs; } else if (IS_PINEVIEW(dev_priv)) { if (!pnv_get_cxsr_latency(dev_priv)) { - drm_info(&dev_priv->drm, "Unknown FSB/MEM, disabling CxSR\n"); + drm_info(&dev_priv->drm, "Unknown FSB/MEM, disabling CxSR\n"); /* Disable CxSR and never update its watermark again */ intel_set_memory_cxsr(dev_priv, false); dev_priv->display.funcs.wm = &nop_funcs; diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index ae8f6617aa70..293efc1f841d 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -27,6 +27,7 @@ #include <drm/display/drm_dsc_helper.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_fixed.h> #include <drm/drm_mipi_dsi.h> #include "i915_reg.h" @@ -330,7 +331,7 @@ static int afe_clk(struct intel_encoder *encoder, int bpp; if (crtc_state->dsc.compression_enable) - bpp = to_bpp_int(crtc_state->dsc.compressed_bpp_x16); + bpp = fxp_q4_to_int(crtc_state->dsc.compressed_bpp_x16); else bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); @@ -863,7 +864,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, * compressed and non-compressed bpp. */ if (crtc_state->dsc.compression_enable) { - mul = to_bpp_int(crtc_state->dsc.compressed_bpp_x16); + mul = fxp_q4_to_int(crtc_state->dsc.compressed_bpp_x16); div = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); } @@ -887,7 +888,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder, int bpp, line_time_us, byte_clk_period_ns; if (crtc_state->dsc.compression_enable) - bpp = to_bpp_int(crtc_state->dsc.compressed_bpp_x16); + bpp = fxp_q4_to_int(crtc_state->dsc.compressed_bpp_x16); else bpp = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); @@ -1470,7 +1471,7 @@ static void gen11_dsi_get_timings(struct intel_encoder *encoder, &pipe_config->hw.adjusted_mode; if (pipe_config->dsc.compressed_bpp_x16) { - int div = to_bpp_int(pipe_config->dsc.compressed_bpp_x16); + int div = fxp_q4_to_int(pipe_config->dsc.compressed_bpp_x16); int mul = mipi_dsi_pixel_format_to_bpp(intel_dsi->pixel_format); adjusted_mode->crtc_htotal = @@ -1944,6 +1945,7 @@ static void icl_dsi_add_properties(struct intel_connector *connector) void icl_dsi_init(struct drm_i915_private *dev_priv, const struct intel_bios_encoder_data *devdata) { + struct intel_display *display = &dev_priv->display; struct intel_dsi *intel_dsi; struct intel_encoder *encoder; struct intel_connector *intel_connector; @@ -2007,7 +2009,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv, intel_dsi->panel_power_off_time = ktime_get_boottime(); - intel_bios_init_panel_late(dev_priv, &intel_connector->panel, encoder->devdata, NULL); + intel_bios_init_panel_late(display, &intel_connector->panel, encoder->devdata, NULL); mutex_lock(&dev_priv->drm.mode_config.mutex); intel_panel_add_vbt_lfp_fixed_mode(intel_connector); diff --git a/drivers/gpu/drm/i915/display/intel_acpi.c b/drivers/gpu/drm/i915/display/intel_acpi.c index 0aa3999374e2..c3b29a331d72 100644 --- a/drivers/gpu/drm/i915/display/intel_acpi.c +++ b/drivers/gpu/drm/i915/display/intel_acpi.c @@ -183,9 +183,9 @@ void intel_unregister_dsm_handler(void) { } -void intel_dsm_get_bios_data_funcs_supported(struct drm_i915_private *i915) +void intel_dsm_get_bios_data_funcs_supported(struct intel_display *display) { - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); acpi_handle dhandle; union acpi_object *obj; @@ -263,15 +263,14 @@ static u32 acpi_display_type(struct intel_connector *connector) return display_type; } -void intel_acpi_device_id_update(struct drm_i915_private *dev_priv) +void intel_acpi_device_id_update(struct intel_display *display) { - struct drm_device *drm_dev = &dev_priv->drm; struct intel_connector *connector; struct drm_connector_list_iter conn_iter; u8 display_index[16] = {}; /* Populate the ACPI IDs for all connectors for a given drm_device */ - drm_connector_list_iter_begin(drm_dev, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { u32 device_id, type; @@ -288,10 +287,10 @@ void intel_acpi_device_id_update(struct drm_i915_private *dev_priv) } /* NOTE: The connector order must be final before this is called. */ -void intel_acpi_assign_connector_fwnodes(struct drm_i915_private *i915) +void intel_acpi_assign_connector_fwnodes(struct intel_display *display) { + struct drm_device *drm_dev = display->drm; struct drm_connector_list_iter conn_iter; - struct drm_device *drm_dev = &i915->drm; struct fwnode_handle *fwnode = NULL; struct drm_connector *connector; struct acpi_device *adev; @@ -333,7 +332,7 @@ void intel_acpi_assign_connector_fwnodes(struct drm_i915_private *i915) fwnode_handle_put(fwnode); } -void intel_acpi_video_register(struct drm_i915_private *i915) +void intel_acpi_video_register(struct intel_display *display) { struct drm_connector_list_iter conn_iter; struct drm_connector *connector; @@ -347,7 +346,7 @@ void intel_acpi_video_register(struct drm_i915_private *i915) * a native backlight later and acpi_video_register_backlight() should * only be called after any native backlights have been registered. */ - drm_connector_list_iter_begin(&i915->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { struct intel_panel *panel = &to_intel_connector(connector)->panel; diff --git a/drivers/gpu/drm/i915/display/intel_acpi.h b/drivers/gpu/drm/i915/display/intel_acpi.h index 6a0007452f95..788a63071661 100644 --- a/drivers/gpu/drm/i915/display/intel_acpi.h +++ b/drivers/gpu/drm/i915/display/intel_acpi.h @@ -6,26 +6,26 @@ #ifndef __INTEL_ACPI_H__ #define __INTEL_ACPI_H__ -struct drm_i915_private; +struct intel_display; #ifdef CONFIG_ACPI void intel_register_dsm_handler(void); void intel_unregister_dsm_handler(void); -void intel_dsm_get_bios_data_funcs_supported(struct drm_i915_private *i915); -void intel_acpi_device_id_update(struct drm_i915_private *i915); -void intel_acpi_assign_connector_fwnodes(struct drm_i915_private *i915); -void intel_acpi_video_register(struct drm_i915_private *i915); +void intel_dsm_get_bios_data_funcs_supported(struct intel_display *display); +void intel_acpi_device_id_update(struct intel_display *display); +void intel_acpi_assign_connector_fwnodes(struct intel_display *display); +void intel_acpi_video_register(struct intel_display *display); #else static inline void intel_register_dsm_handler(void) { return; } static inline void intel_unregister_dsm_handler(void) { return; } static inline -void intel_dsm_get_bios_data_funcs_supported(struct drm_i915_private *i915) { return; } +void intel_dsm_get_bios_data_funcs_supported(struct intel_display *display) { return; } static inline -void intel_acpi_device_id_update(struct drm_i915_private *i915) { return; } +void intel_acpi_device_id_update(struct intel_display *display) { return; } static inline -void intel_acpi_assign_connector_fwnodes(struct drm_i915_private *i915) { return; } +void intel_acpi_assign_connector_fwnodes(struct intel_display *display) { return; } static inline -void intel_acpi_video_register(struct drm_i915_private *i915) { return; } +void intel_acpi_video_register(struct intel_display *display) { return; } #endif /* CONFIG_ACPI */ #endif /* __INTEL_ACPI_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_alpm.c b/drivers/gpu/drm/i915/display/intel_alpm.c index 866b3b409c4d..82ee778b2efe 100644 --- a/drivers/gpu/drm/i915/display/intel_alpm.c +++ b/drivers/gpu/drm/i915/display/intel_alpm.c @@ -139,7 +139,7 @@ static int _lnl_compute_aux_less_alpm_params(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); int aux_less_wake_time, aux_less_wake_lines, silence_period, lfps_half_cycle; @@ -158,7 +158,7 @@ _lnl_compute_aux_less_alpm_params(struct intel_dp *intel_dp, lfps_half_cycle > PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION_MASK) return false; - if (i915->display.params.psr_safest_params) + if (display->params.psr_safest_params) aux_less_wake_lines = ALPM_CTL_AUX_LESS_WAKE_TIME_MASK; intel_dp->alpm_parameters.aux_less_wake_lines = aux_less_wake_lines; @@ -171,10 +171,10 @@ _lnl_compute_aux_less_alpm_params(struct intel_dp *intel_dp, static bool _lnl_compute_alpm_params(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); int check_entry_lines; - if (DISPLAY_VER(i915) < 20) + if (DISPLAY_VER(display) < 20) return true; /* ALPM Entry Check = 2 + CEILING( 5us /tline ) */ @@ -187,7 +187,7 @@ static bool _lnl_compute_alpm_params(struct intel_dp *intel_dp, if (!_lnl_compute_aux_less_alpm_params(intel_dp, crtc_state)) return false; - if (i915->display.params.psr_safest_params) + if (display->params.psr_safest_params) check_entry_lines = 15; intel_dp->alpm_parameters.check_entry_lines = check_entry_lines; @@ -212,9 +212,9 @@ static int tgl_io_buffer_wake_time(void) static int io_buffer_wake_time(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); - if (DISPLAY_VER(i915) >= 12) + if (DISPLAY_VER(display) >= 12) return tgl_io_buffer_wake_time(); else return skl_io_buffer_wake_time(); @@ -223,7 +223,7 @@ static int io_buffer_wake_time(const struct intel_crtc_state *crtc_state) bool intel_alpm_compute_params(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); int io_wake_lines, io_wake_time, fast_wake_lines, fast_wake_time; int tfw_exit_latency = 20; /* eDP spec */ int phy_wake = 4; /* eDP spec */ @@ -236,9 +236,9 @@ bool intel_alpm_compute_params(struct intel_dp *intel_dp, fast_wake_time = precharge + preamble + phy_wake + tfw_exit_latency; - if (DISPLAY_VER(i915) >= 20) + if (DISPLAY_VER(display) >= 20) max_wake_lines = 68; - else if (DISPLAY_VER(i915) >= 12) + else if (DISPLAY_VER(display) >= 12) max_wake_lines = 12; else max_wake_lines = 8; @@ -255,7 +255,7 @@ bool intel_alpm_compute_params(struct intel_dp *intel_dp, if (!_lnl_compute_alpm_params(intel_dp, crtc_state)) return false; - if (i915->display.params.psr_safest_params) + if (display->params.psr_safest_params) io_wake_lines = fast_wake_lines = max_wake_lines; /* According to Bspec lower limit should be set as 7 lines. */ @@ -269,7 +269,7 @@ void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; int waketime_in_lines, first_sdp_position; int context_latency, guardband; @@ -277,10 +277,10 @@ void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp, if (!intel_dp_is_edp(intel_dp)) return; - if (DISPLAY_VER(i915) < 20) + if (DISPLAY_VER(display) < 20) return; - if (!intel_dp_as_sdp_supported(intel_dp)) + if (!intel_dp->as_sdp_supported) return; if (crtc_state->has_psr) @@ -309,13 +309,13 @@ void intel_alpm_lobf_compute_config(struct intel_dp *intel_dp, static void lnl_alpm_configure(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; enum port port = dp_to_dig_port(intel_dp)->base.port; u32 alpm_ctl; - if (DISPLAY_VER(dev_priv) < 20 || (!intel_dp->psr.sel_update_enabled && - !intel_dp_is_edp(intel_dp))) + if (DISPLAY_VER(display) < 20 || + (!intel_dp->psr.sel_update_enabled && !intel_dp_is_edp(intel_dp))) return; /* @@ -329,16 +329,16 @@ static void lnl_alpm_configure(struct intel_dp *intel_dp, ALPM_CTL_AUX_LESS_SLEEP_HOLD_TIME_50_SYMBOLS | ALPM_CTL_AUX_LESS_WAKE_TIME(intel_dp->alpm_parameters.aux_less_wake_lines); - intel_de_write(dev_priv, - PORT_ALPM_CTL(dev_priv, port), + intel_de_write(display, + PORT_ALPM_CTL(display, port), PORT_ALPM_CTL_ALPM_AUX_LESS_ENABLE | PORT_ALPM_CTL_MAX_PHY_SWING_SETUP(15) | PORT_ALPM_CTL_MAX_PHY_SWING_HOLD(0) | PORT_ALPM_CTL_SILENCE_PERIOD( intel_dp->alpm_parameters.silence_period_sym_clocks)); - intel_de_write(dev_priv, - PORT_ALPM_LFPS_CTL(dev_priv, port), + intel_de_write(display, + PORT_ALPM_LFPS_CTL(display, port), PORT_ALPM_LFPS_CTL_LFPS_CYCLE_COUNT(10) | PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION( intel_dp->alpm_parameters.lfps_half_cycle_num_of_syms) | @@ -356,7 +356,7 @@ static void lnl_alpm_configure(struct intel_dp *intel_dp, alpm_ctl |= ALPM_CTL_ALPM_ENTRY_CHECK(intel_dp->alpm_parameters.check_entry_lines); - intel_de_write(dev_priv, ALPM_CTL(dev_priv, cpu_transcoder), alpm_ctl); + intel_de_write(display, ALPM_CTL(display, cpu_transcoder), alpm_ctl); } void intel_alpm_configure(struct intel_dp *intel_dp, @@ -368,14 +368,14 @@ void intel_alpm_configure(struct intel_dp *intel_dp, static int i915_edp_lobf_info_show(struct seq_file *m, void *data) { struct intel_connector *connector = m->private; - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct drm_crtc *crtc; struct intel_crtc_state *crtc_state; enum transcoder cpu_transcoder; u32 alpm_ctl; int ret; - ret = drm_modeset_lock_single_interruptible(&dev_priv->drm.mode_config.connection_mutex); + ret = drm_modeset_lock_single_interruptible(&display->drm->mode_config.connection_mutex); if (ret) return ret; @@ -387,14 +387,14 @@ static int i915_edp_lobf_info_show(struct seq_file *m, void *data) crtc_state = to_intel_crtc_state(crtc->state); cpu_transcoder = crtc_state->cpu_transcoder; - alpm_ctl = intel_de_read(dev_priv, ALPM_CTL(dev_priv, cpu_transcoder)); + alpm_ctl = intel_de_read(display, ALPM_CTL(display, cpu_transcoder)); seq_printf(m, "LOBF status: %s\n", str_enabled_disabled(alpm_ctl & ALPM_CTL_LOBF_ENABLE)); seq_printf(m, "Aux-wake alpm status: %s\n", str_enabled_disabled(!(alpm_ctl & ALPM_CTL_ALPM_AUX_LESS_ENABLE))); seq_printf(m, "Aux-less alpm status: %s\n", str_enabled_disabled(alpm_ctl & ALPM_CTL_ALPM_AUX_LESS_ENABLE)); out: - drm_modeset_unlock(&dev_priv->drm.mode_config.connection_mutex); + drm_modeset_unlock(&display->drm->mode_config.connection_mutex); return ret; } @@ -403,10 +403,10 @@ DEFINE_SHOW_ATTRIBUTE(i915_edp_lobf_info); void intel_alpm_lobf_debugfs_add(struct intel_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct dentry *root = connector->base.debugfs_entry; - if (DISPLAY_VER(i915) < 20 || + if (DISPLAY_VER(display) < 20 || connector->base.connector_type != DRM_MODE_CONNECTOR_eDP) return; diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 76aa10b6f647..12d6ed940751 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -276,7 +276,8 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->do_async_flip = false; crtc_state->fb_bits = 0; crtc_state->update_planes = 0; - crtc_state->dsb = NULL; + crtc_state->dsb_color_vblank = NULL; + crtc_state->dsb_color_commit = NULL; return &crtc_state->uapi; } @@ -310,7 +311,8 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, { struct intel_crtc_state *crtc_state = to_intel_crtc_state(state); - drm_WARN_ON(crtc->dev, crtc_state->dsb); + drm_WARN_ON(crtc->dev, crtc_state->dsb_color_vblank); + drm_WARN_ON(crtc->dev, crtc_state->dsb_color_commit); __drm_atomic_helper_crtc_destroy_state(&crtc_state->uapi); intel_crtc_free_hw_state(crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index b9bafec06fb8..9b8508a503f7 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -26,6 +26,7 @@ #include <drm/drm_edid.h> #include <drm/drm_eld.h> +#include <drm/drm_fixed.h> #include <drm/intel/i915_component.h> #include "i915_drv.h" @@ -452,8 +453,8 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, lanes = crtc_state->lane_count; drm_dbg_kms(&i915->drm, - "h_active = %u link_clk = %u : lanes = %u vdsc_bpp = " BPP_X16_FMT " cdclk = %u\n", - h_active, link_clk, lanes, BPP_X16_ARGS(vdsc_bppx16), cdclk); + "h_active = %u link_clk = %u : lanes = %u vdsc_bpp = " FXP_Q4_FMT " cdclk = %u\n", + h_active, link_clk, lanes, FXP_Q4_ARGS(vdsc_bppx16), cdclk); if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bppx16 || !cdclk)) return 0; diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 071668bfe5d1..9e05745d797d 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -455,7 +455,7 @@ void intel_backlight_disable(const struct drm_connector_state *old_conn_state) mutex_lock(&i915->display.backlight.lock); if (panel->backlight.device) - panel->backlight.device->props.power = FB_BLANK_POWERDOWN; + panel->backlight.device->props.power = BACKLIGHT_POWER_OFF; panel->backlight.enabled = false; panel->backlight.funcs->disable(old_conn_state, 0); @@ -773,7 +773,7 @@ static void __intel_backlight_enable(const struct intel_crtc_state *crtc_state, panel->backlight.funcs->enable(crtc_state, conn_state, panel->backlight.level); panel->backlight.enabled = true; if (panel->backlight.device) - panel->backlight.device->props.power = FB_BLANK_UNBLANK; + panel->backlight.device->props.power = BACKLIGHT_POWER_ON; } void intel_backlight_enable(const struct intel_crtc_state *crtc_state, @@ -870,12 +870,12 @@ static int intel_backlight_device_update_status(struct backlight_device *bd) */ if (panel->backlight.enabled) { if (panel->backlight.power) { - bool enable = bd->props.power == FB_BLANK_UNBLANK && + bool enable = bd->props.power == BACKLIGHT_POWER_ON && bd->props.brightness != 0; panel->backlight.power(connector, enable); } } else { - bd->props.power = FB_BLANK_POWERDOWN; + bd->props.power = BACKLIGHT_POWER_OFF; } drm_modeset_unlock(&i915->drm.mode_config.connection_mutex); @@ -945,9 +945,9 @@ int intel_backlight_device_register(struct intel_connector *connector) props.max_brightness); if (panel->backlight.enabled) - props.power = FB_BLANK_UNBLANK; + props.power = BACKLIGHT_POWER_ON; else - props.power = FB_BLANK_POWERDOWN; + props.power = BACKLIGHT_POWER_OFF; name = kstrdup_const("intel_backlight", GFP_KERNEL); if (!name) @@ -1011,7 +1011,7 @@ static u32 cnp_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) { struct drm_i915_private *i915 = to_i915(connector->base.dev); - return DIV_ROUND_CLOSEST(KHz(RUNTIME_INFO(i915)->rawclk_freq), + return DIV_ROUND_CLOSEST(KHz(DISPLAY_RUNTIME_INFO(i915)->rawclk_freq), pwm_freq_hz); } @@ -1073,7 +1073,7 @@ static u32 pch_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) { struct drm_i915_private *i915 = to_i915(connector->base.dev); - return DIV_ROUND_CLOSEST(KHz(RUNTIME_INFO(i915)->rawclk_freq), + return DIV_ROUND_CLOSEST(KHz(DISPLAY_RUNTIME_INFO(i915)->rawclk_freq), pwm_freq_hz * 128); } @@ -1091,7 +1091,7 @@ static u32 i9xx_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) int clock; if (IS_PINEVIEW(i915)) - clock = KHz(RUNTIME_INFO(i915)->rawclk_freq); + clock = KHz(DISPLAY_RUNTIME_INFO(i915)->rawclk_freq); else clock = KHz(i915->display.cdclk.hw.cdclk); @@ -1109,7 +1109,7 @@ static u32 i965_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) int clock; if (IS_G4X(i915)) - clock = KHz(RUNTIME_INFO(i915)->rawclk_freq); + clock = KHz(DISPLAY_RUNTIME_INFO(i915)->rawclk_freq); else clock = KHz(i915->display.cdclk.hw.cdclk); @@ -1133,7 +1133,7 @@ static u32 vlv_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) clock = MHz(25); mul = 16; } else { - clock = KHz(RUNTIME_INFO(i915)->rawclk_freq); + clock = KHz(DISPLAY_RUNTIME_INFO(i915)->rawclk_freq); mul = 128; } @@ -1449,6 +1449,9 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused) static int cnp_num_backlight_controllers(struct drm_i915_private *i915) { + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index ec1e3a380360..d49435af62c7 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -30,6 +30,7 @@ #include <drm/display/drm_dp_helper.h> #include <drm/display/drm_dsc_helper.h> #include <drm/drm_edid.h> +#include <drm/drm_fixed.h> #include "i915_drv.h" #include "i915_reg.h" @@ -65,15 +66,15 @@ /* Wrapper for VBT child device config */ struct intel_bios_encoder_data { - struct drm_i915_private *i915; + struct intel_display *display; struct child_device_config child; struct dsc_compression_parameters_entry *dsc; struct list_head node; }; -#define SLAVE_ADDR1 0x70 -#define SLAVE_ADDR2 0x72 +#define TARGET_ADDR1 0x70 +#define TARGET_ADDR2 0x72 /* Get BDB block size given a pointer to Block ID. */ static u32 _get_blocksize(const u8 *block_base) @@ -144,12 +145,12 @@ struct bdb_block_entry { }; static const void * -bdb_find_section(struct drm_i915_private *i915, +bdb_find_section(struct intel_display *display, enum bdb_block_id section_id) { struct bdb_block_entry *entry; - list_for_each_entry(entry, &i915->display.vbt.bdb_blocks, node) { + list_for_each_entry(entry, &display->vbt.bdb_blocks, node) { if (entry->section_id == section_id) return entry->data + 3; } @@ -199,12 +200,12 @@ static const struct { .min_size = sizeof(struct bdb_generic_dtd), }, }; -static size_t lfp_data_min_size(struct drm_i915_private *i915) +static size_t lfp_data_min_size(struct intel_display *display) { const struct bdb_lfp_data_ptrs *ptrs; size_t size; - ptrs = bdb_find_section(i915, BDB_LFP_DATA_PTRS); + ptrs = bdb_find_section(display, BDB_LFP_DATA_PTRS); if (!ptrs) return 0; @@ -359,7 +360,7 @@ static void next_lfp_data_ptr(struct lfp_data_ptr_table *next, next->offset = prev->offset + size; } -static void *generate_lfp_data_ptrs(struct drm_i915_private *i915, +static void *generate_lfp_data_ptrs(struct intel_display *display, const void *bdb) { int i, size, table_size, block_size, offset, fp_timing_size; @@ -373,7 +374,7 @@ static void *generate_lfp_data_ptrs(struct drm_i915_private *i915, * include block 41 and thus we don't need to * generate one. */ - if (i915->display.vbt.version < 155) + if (display->vbt.version < 155) return NULL; fp_timing_size = 38; @@ -382,7 +383,7 @@ static void *generate_lfp_data_ptrs(struct drm_i915_private *i915, if (!block) return NULL; - drm_dbg_kms(&i915->drm, "Generating LFP data table pointers\n"); + drm_dbg_kms(display->drm, "Generating LFP data table pointers\n"); block_size = get_blocksize(block); @@ -450,7 +451,7 @@ static void *generate_lfp_data_ptrs(struct drm_i915_private *i915, } static void -init_bdb_block(struct drm_i915_private *i915, +init_bdb_block(struct intel_display *display, const void *bdb, enum bdb_block_id section_id, size_t min_size) { @@ -463,14 +464,14 @@ init_bdb_block(struct drm_i915_private *i915, /* Modern VBTs lack the LFP data table pointers block, make one up */ if (!block && section_id == BDB_LFP_DATA_PTRS) { - temp_block = generate_lfp_data_ptrs(i915, bdb); + temp_block = generate_lfp_data_ptrs(display, bdb); if (temp_block) block = temp_block + 3; } if (!block) return; - drm_WARN(&i915->drm, min_size == 0, + drm_WARN(display->drm, min_size == 0, "Block %d min_size is zero\n", section_id); block_size = get_blocksize(block); @@ -494,20 +495,22 @@ init_bdb_block(struct drm_i915_private *i915, kfree(temp_block); - drm_dbg_kms(&i915->drm, "Found BDB block %d (size %zu, min size %zu)\n", + drm_dbg_kms(display->drm, + "Found BDB block %d (size %zu, min size %zu)\n", section_id, block_size, min_size); if (section_id == BDB_LFP_DATA_PTRS && !fixup_lfp_data_ptrs(bdb, entry->data + 3)) { - drm_err(&i915->drm, "VBT has malformed LFP data table pointers\n"); + drm_err(display->drm, + "VBT has malformed LFP data table pointers\n"); kfree(entry); return; } - list_add_tail(&entry->node, &i915->display.vbt.bdb_blocks); + list_add_tail(&entry->node, &display->vbt.bdb_blocks); } -static void init_bdb_blocks(struct drm_i915_private *i915, +static void init_bdb_blocks(struct intel_display *display, const void *bdb) { int i; @@ -517,14 +520,14 @@ static void init_bdb_blocks(struct drm_i915_private *i915, size_t min_size = bdb_blocks[i].min_size; if (section_id == BDB_LFP_DATA) - min_size = lfp_data_min_size(i915); + min_size = lfp_data_min_size(display); - init_bdb_block(i915, bdb, section_id, min_size); + init_bdb_block(display, bdb, section_id, min_size); } } static void -fill_detail_timing_data(struct drm_i915_private *i915, +fill_detail_timing_data(struct intel_display *display, struct drm_display_mode *panel_fixed_mode, const struct bdb_edid_dtd *dvo_timing) { @@ -567,12 +570,12 @@ fill_detail_timing_data(struct drm_i915_private *i915, /* Some VBTs have bogus h/vsync_end values */ if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal) { - drm_dbg_kms(&i915->drm, "reducing hsync_end %d->%d\n", + drm_dbg_kms(display->drm, "reducing hsync_end %d->%d\n", panel_fixed_mode->hsync_end, panel_fixed_mode->htotal); panel_fixed_mode->hsync_end = panel_fixed_mode->htotal; } if (panel_fixed_mode->vsync_end > panel_fixed_mode->vtotal) { - drm_dbg_kms(&i915->drm, "reducing vsync_end %d->%d\n", + drm_dbg_kms(display->drm, "reducing vsync_end %d->%d\n", panel_fixed_mode->vsync_end, panel_fixed_mode->vtotal); panel_fixed_mode->vsync_end = panel_fixed_mode->vtotal; } @@ -617,26 +620,26 @@ get_lfp_data_tail(const struct bdb_lfp_data *data, return NULL; } -static int opregion_get_panel_type(struct drm_i915_private *i915, +static int opregion_get_panel_type(struct intel_display *display, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback) { - return intel_opregion_get_panel_type(i915); + return intel_opregion_get_panel_type(display); } -static int vbt_get_panel_type(struct drm_i915_private *i915, +static int vbt_get_panel_type(struct intel_display *display, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback) { const struct bdb_lfp_options *lfp_options; - lfp_options = bdb_find_section(i915, BDB_LFP_OPTIONS); + lfp_options = bdb_find_section(display, BDB_LFP_OPTIONS); if (!lfp_options) return -1; if (lfp_options->panel_type > 0xf && lfp_options->panel_type != 0xff) { - drm_dbg_kms(&i915->drm, "Invalid VBT panel type 0x%x\n", + drm_dbg_kms(display->drm, "Invalid VBT panel type 0x%x\n", lfp_options->panel_type); return -1; } @@ -644,12 +647,13 @@ static int vbt_get_panel_type(struct drm_i915_private *i915, if (devdata && devdata->child.handle == DEVICE_HANDLE_LFP2) return lfp_options->panel_type2; - drm_WARN_ON(&i915->drm, devdata && devdata->child.handle != DEVICE_HANDLE_LFP1); + drm_WARN_ON(display->drm, + devdata && devdata->child.handle != DEVICE_HANDLE_LFP1); return lfp_options->panel_type; } -static int pnpid_get_panel_type(struct drm_i915_private *i915, +static int pnpid_get_panel_type(struct intel_display *display, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback) { @@ -668,14 +672,14 @@ static int pnpid_get_panel_type(struct drm_i915_private *i915, product_id_nodate.week_of_manufacture = 0; product_id_nodate.year_of_manufacture = 0; - p = drm_dbg_printer(&i915->drm, DRM_UT_KMS, "EDID"); + p = drm_dbg_printer(display->drm, DRM_UT_KMS, "EDID"); drm_edid_print_product_id(&p, &product_id, true); - ptrs = bdb_find_section(i915, BDB_LFP_DATA_PTRS); + ptrs = bdb_find_section(display, BDB_LFP_DATA_PTRS); if (!ptrs) return -1; - data = bdb_find_section(i915, BDB_LFP_DATA); + data = bdb_find_section(display, BDB_LFP_DATA); if (!data) return -1; @@ -699,7 +703,7 @@ static int pnpid_get_panel_type(struct drm_i915_private *i915, return best; } -static int fallback_get_panel_type(struct drm_i915_private *i915, +static int fallback_get_panel_type(struct intel_display *display, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback) { @@ -713,13 +717,13 @@ enum panel_type { PANEL_TYPE_FALLBACK, }; -static int get_panel_type(struct drm_i915_private *i915, +static int get_panel_type(struct intel_display *display, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback) { struct { const char *name; - int (*get_panel_type)(struct drm_i915_private *i915, + int (*get_panel_type)(struct intel_display *display, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, bool use_fallback); int panel_type; @@ -744,14 +748,14 @@ static int get_panel_type(struct drm_i915_private *i915, int i; for (i = 0; i < ARRAY_SIZE(panel_types); i++) { - panel_types[i].panel_type = panel_types[i].get_panel_type(i915, devdata, + panel_types[i].panel_type = panel_types[i].get_panel_type(display, devdata, drm_edid, use_fallback); - drm_WARN_ON(&i915->drm, panel_types[i].panel_type > 0xf && + drm_WARN_ON(display->drm, panel_types[i].panel_type > 0xf && panel_types[i].panel_type != 0xff); if (panel_types[i].panel_type >= 0) - drm_dbg_kms(&i915->drm, "Panel type (%s): %d\n", + drm_dbg_kms(display->drm, "Panel type (%s): %d\n", panel_types[i].name, panel_types[i].panel_type); } @@ -766,7 +770,7 @@ static int get_panel_type(struct drm_i915_private *i915, else i = PANEL_TYPE_FALLBACK; - drm_dbg_kms(&i915->drm, "Selected panel type (%s): %d\n", + drm_dbg_kms(display->drm, "Selected panel type (%s): %d\n", panel_types[i].name, panel_types[i].panel_type); return panel_types[i].panel_type; @@ -784,14 +788,14 @@ static bool panel_bool(unsigned int value, int panel_type) /* Parse general panel options */ static void -parse_panel_options(struct drm_i915_private *i915, +parse_panel_options(struct intel_display *display, struct intel_panel *panel) { const struct bdb_lfp_options *lfp_options; int panel_type = panel->vbt.panel_type; int drrs_mode; - lfp_options = bdb_find_section(i915, BDB_LFP_OPTIONS); + lfp_options = bdb_find_section(display, BDB_LFP_OPTIONS); if (!lfp_options) return; @@ -815,23 +819,23 @@ parse_panel_options(struct drm_i915_private *i915, switch (drrs_mode) { case 0: panel->vbt.drrs_type = DRRS_TYPE_STATIC; - drm_dbg_kms(&i915->drm, "DRRS supported mode is static\n"); + drm_dbg_kms(display->drm, "DRRS supported mode is static\n"); break; case 2: panel->vbt.drrs_type = DRRS_TYPE_SEAMLESS; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "DRRS supported mode is seamless\n"); break; default: panel->vbt.drrs_type = DRRS_TYPE_NONE; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "DRRS not supported (VBT input)\n"); break; } } static void -parse_lfp_panel_dtd(struct drm_i915_private *i915, +parse_lfp_panel_dtd(struct intel_display *display, struct intel_panel *panel, const struct bdb_lfp_data *lfp_data, const struct bdb_lfp_data_ptrs *lfp_data_ptrs) @@ -849,11 +853,11 @@ parse_lfp_panel_dtd(struct drm_i915_private *i915, if (!panel_fixed_mode) return; - fill_detail_timing_data(i915, panel_fixed_mode, panel_dvo_timing); + fill_detail_timing_data(display, panel_fixed_mode, panel_dvo_timing); panel->vbt.lfp_vbt_mode = panel_fixed_mode; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Found panel mode in BIOS VBT legacy lfp table: " DRM_MODE_FMT "\n", DRM_MODE_ARG(panel_fixed_mode)); @@ -865,14 +869,14 @@ parse_lfp_panel_dtd(struct drm_i915_private *i915, if (fp_timing->x_res == panel_fixed_mode->hdisplay && fp_timing->y_res == panel_fixed_mode->vdisplay) { panel->vbt.bios_lvds_val = fp_timing->lvds_reg_val; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT initial LVDS value %x\n", panel->vbt.bios_lvds_val); } } static void -parse_lfp_data(struct drm_i915_private *i915, +parse_lfp_data(struct intel_display *display, struct intel_panel *panel) { const struct bdb_lfp_data *data; @@ -882,41 +886,41 @@ parse_lfp_data(struct drm_i915_private *i915, struct drm_printer p; int panel_type = panel->vbt.panel_type; - ptrs = bdb_find_section(i915, BDB_LFP_DATA_PTRS); + ptrs = bdb_find_section(display, BDB_LFP_DATA_PTRS); if (!ptrs) return; - data = bdb_find_section(i915, BDB_LFP_DATA); + data = bdb_find_section(display, BDB_LFP_DATA); if (!data) return; if (!panel->vbt.lfp_vbt_mode) - parse_lfp_panel_dtd(i915, panel, data, ptrs); + parse_lfp_panel_dtd(display, panel, data, ptrs); pnp_id = get_lfp_pnp_id(data, ptrs, panel_type); - p = drm_dbg_printer(&i915->drm, DRM_UT_KMS, "Panel"); + p = drm_dbg_printer(display->drm, DRM_UT_KMS, "Panel"); drm_edid_print_product_id(&p, pnp_id, false); tail = get_lfp_data_tail(data, ptrs); if (!tail) return; - drm_dbg_kms(&i915->drm, "Panel name: %.*s\n", + drm_dbg_kms(display->drm, "Panel name: %.*s\n", (int)sizeof(tail->panel_name[0].name), tail->panel_name[panel_type].name); - if (i915->display.vbt.version >= 188) { + if (display->vbt.version >= 188) { panel->vbt.seamless_drrs_min_refresh_rate = tail->seamless_drrs_min_refresh_rate[panel_type]; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Seamless DRRS min refresh rate: %d Hz\n", panel->vbt.seamless_drrs_min_refresh_rate); } } static void -parse_generic_dtd(struct drm_i915_private *i915, +parse_generic_dtd(struct intel_display *display, struct intel_panel *panel) { const struct bdb_generic_dtd *generic_dtd; @@ -932,20 +936,20 @@ parse_generic_dtd(struct drm_i915_private *i915, * first on VBT >= 229, but still fall back to trying the old LFP * block if that fails. */ - if (i915->display.vbt.version < 229) + if (display->vbt.version < 229) return; - generic_dtd = bdb_find_section(i915, BDB_GENERIC_DTD); + generic_dtd = bdb_find_section(display, BDB_GENERIC_DTD); if (!generic_dtd) return; if (generic_dtd->gdtd_size < sizeof(struct generic_dtd_entry)) { - drm_err(&i915->drm, "GDTD size %u is too small.\n", + drm_err(display->drm, "GDTD size %u is too small.\n", generic_dtd->gdtd_size); return; } else if (generic_dtd->gdtd_size != sizeof(struct generic_dtd_entry)) { - drm_err(&i915->drm, "Unexpected GDTD size %u\n", + drm_err(display->drm, "Unexpected GDTD size %u\n", generic_dtd->gdtd_size); /* DTD has unknown fields, but keep going */ } @@ -953,7 +957,7 @@ parse_generic_dtd(struct drm_i915_private *i915, num_dtd = (get_blocksize(generic_dtd) - sizeof(struct bdb_generic_dtd)) / generic_dtd->gdtd_size; if (panel->vbt.panel_type >= num_dtd) { - drm_err(&i915->drm, + drm_err(display->drm, "Panel type %d not found in table of %d DTD's\n", panel->vbt.panel_type, num_dtd); return; @@ -998,7 +1002,7 @@ parse_generic_dtd(struct drm_i915_private *i915, else panel_fixed_mode->flags |= DRM_MODE_FLAG_NVSYNC; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Found panel mode in BIOS VBT generic dtd table: " DRM_MODE_FMT "\n", DRM_MODE_ARG(panel_fixed_mode)); @@ -1006,7 +1010,7 @@ parse_generic_dtd(struct drm_i915_private *i915, } static void -parse_lfp_backlight(struct drm_i915_private *i915, +parse_lfp_backlight(struct intel_display *display, struct intel_panel *panel) { const struct bdb_lfp_backlight *backlight_data; @@ -1014,12 +1018,12 @@ parse_lfp_backlight(struct drm_i915_private *i915, int panel_type = panel->vbt.panel_type; u16 level; - backlight_data = bdb_find_section(i915, BDB_LFP_BACKLIGHT); + backlight_data = bdb_find_section(display, BDB_LFP_BACKLIGHT); if (!backlight_data) return; if (backlight_data->entry_size != sizeof(backlight_data->data[0])) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Unsupported backlight data entry size %u\n", backlight_data->entry_size); return; @@ -1029,7 +1033,7 @@ parse_lfp_backlight(struct drm_i915_private *i915, panel->vbt.backlight.present = entry->type == BDB_BACKLIGHT_TYPE_PWM; if (!panel->vbt.backlight.present) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "PWM backlight not present in VBT (type %u)\n", entry->type); return; @@ -1037,7 +1041,7 @@ parse_lfp_backlight(struct drm_i915_private *i915, panel->vbt.backlight.type = INTEL_BACKLIGHT_DISPLAY_DDI; panel->vbt.backlight.controller = 0; - if (i915->display.vbt.version >= 191) { + if (display->vbt.version >= 191) { const struct lfp_backlight_control_method *method; method = &backlight_data->backlight_control[panel_type]; @@ -1048,14 +1052,14 @@ parse_lfp_backlight(struct drm_i915_private *i915, panel->vbt.backlight.pwm_freq_hz = entry->pwm_freq_hz; panel->vbt.backlight.active_low_pwm = entry->active_low_pwm; - if (i915->display.vbt.version >= 234) { + if (display->vbt.version >= 234) { u16 min_level; bool scale; level = backlight_data->brightness_level[panel_type].level; min_level = backlight_data->brightness_min_level[panel_type].level; - if (i915->display.vbt.version >= 236) + if (display->vbt.version >= 236) scale = backlight_data->brightness_precision_bits[panel_type] == 16; else scale = level > 255; @@ -1064,7 +1068,7 @@ parse_lfp_backlight(struct drm_i915_private *i915, min_level = min_level / 255; if (min_level > 255) { - drm_warn(&i915->drm, "Brightness min level > 255\n"); + drm_warn(display->drm, "Brightness min level > 255\n"); level = 255; } panel->vbt.backlight.min_brightness = min_level; @@ -1076,13 +1080,13 @@ parse_lfp_backlight(struct drm_i915_private *i915, panel->vbt.backlight.min_brightness = entry->min_brightness; } - if (i915->display.vbt.version >= 239) + if (display->vbt.version >= 239) panel->vbt.backlight.hdr_dpcd_refresh_timeout = DIV_ROUND_UP(backlight_data->hdr_dpcd_refresh_timeout[panel_type], 100); else panel->vbt.backlight.hdr_dpcd_refresh_timeout = 30; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT backlight PWM modulation frequency %u Hz, " "active %s, min brightness %u, level %u, controller %u\n", panel->vbt.backlight.pwm_freq_hz, @@ -1093,16 +1097,16 @@ parse_lfp_backlight(struct drm_i915_private *i915, } static void -parse_sdvo_lvds_data(struct drm_i915_private *i915, +parse_sdvo_lvds_data(struct intel_display *display, struct intel_panel *panel) { const struct bdb_sdvo_lvds_dtd *dtd; struct drm_display_mode *panel_fixed_mode; int index; - index = i915->display.params.vbt_sdvo_panel_type; + index = display->params.vbt_sdvo_panel_type; if (index == -2) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Ignore SDVO LVDS mode from BIOS VBT tables.\n"); return; } @@ -1110,14 +1114,14 @@ parse_sdvo_lvds_data(struct drm_i915_private *i915, if (index == -1) { const struct bdb_sdvo_lvds_options *sdvo_lvds_options; - sdvo_lvds_options = bdb_find_section(i915, BDB_SDVO_LVDS_OPTIONS); + sdvo_lvds_options = bdb_find_section(display, BDB_SDVO_LVDS_OPTIONS); if (!sdvo_lvds_options) return; index = sdvo_lvds_options->panel_type; } - dtd = bdb_find_section(i915, BDB_SDVO_LVDS_DTD); + dtd = bdb_find_section(display, BDB_SDVO_LVDS_DTD); if (!dtd) return; @@ -1128,7 +1132,8 @@ parse_sdvo_lvds_data(struct drm_i915_private *i915, * it here to be sure. */ if (index >= ARRAY_SIZE(dtd->dtd)) { - drm_err(&i915->drm, "index %d is larger than dtd->dtd[4] array\n", + drm_err(display->drm, + "index %d is larger than dtd->dtd[4] array\n", index); return; } @@ -1137,19 +1142,19 @@ parse_sdvo_lvds_data(struct drm_i915_private *i915, if (!panel_fixed_mode) return; - fill_detail_timing_data(i915, panel_fixed_mode, &dtd->dtd[index]); + fill_detail_timing_data(display, panel_fixed_mode, &dtd->dtd[index]); panel->vbt.sdvo_lvds_vbt_mode = panel_fixed_mode; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Found SDVO LVDS mode in BIOS VBT tables: " DRM_MODE_FMT "\n", DRM_MODE_ARG(panel_fixed_mode)); } -static int intel_bios_ssc_frequency(struct drm_i915_private *i915, +static int intel_bios_ssc_frequency(struct intel_display *display, bool alternate) { - switch (DISPLAY_VER(i915)) { + switch (DISPLAY_VER(display)) { case 2: return alternate ? 66667 : 48000; case 3: @@ -1161,45 +1166,46 @@ static int intel_bios_ssc_frequency(struct drm_i915_private *i915, } static void -parse_general_features(struct drm_i915_private *i915) +parse_general_features(struct intel_display *display) { + struct drm_i915_private *i915 = to_i915(display->drm); const struct bdb_general_features *general; - general = bdb_find_section(i915, BDB_GENERAL_FEATURES); + general = bdb_find_section(display, BDB_GENERAL_FEATURES); if (!general) return; - i915->display.vbt.int_tv_support = general->int_tv_support; + display->vbt.int_tv_support = general->int_tv_support; /* int_crt_support can't be trusted on earlier platforms */ - if (i915->display.vbt.version >= 155 && - (HAS_DDI(i915) || IS_VALLEYVIEW(i915))) - i915->display.vbt.int_crt_support = general->int_crt_support; - i915->display.vbt.lvds_use_ssc = general->enable_ssc; - i915->display.vbt.lvds_ssc_freq = - intel_bios_ssc_frequency(i915, general->ssc_freq); - i915->display.vbt.display_clock_mode = general->display_clock_mode; - i915->display.vbt.fdi_rx_polarity_inverted = general->fdi_rx_polarity_inverted; - if (i915->display.vbt.version >= 181) { - i915->display.vbt.orientation = general->rotate_180 ? + if (display->vbt.version >= 155 && + (HAS_DDI(display) || IS_VALLEYVIEW(i915))) + display->vbt.int_crt_support = general->int_crt_support; + display->vbt.lvds_use_ssc = general->enable_ssc; + display->vbt.lvds_ssc_freq = + intel_bios_ssc_frequency(display, general->ssc_freq); + display->vbt.display_clock_mode = general->display_clock_mode; + display->vbt.fdi_rx_polarity_inverted = general->fdi_rx_polarity_inverted; + if (display->vbt.version >= 181) { + display->vbt.orientation = general->rotate_180 ? DRM_MODE_PANEL_ORIENTATION_BOTTOM_UP : DRM_MODE_PANEL_ORIENTATION_NORMAL; } else { - i915->display.vbt.orientation = DRM_MODE_PANEL_ORIENTATION_UNKNOWN; + display->vbt.orientation = DRM_MODE_PANEL_ORIENTATION_UNKNOWN; } - if (i915->display.vbt.version >= 249 && general->afc_startup_config) { - i915->display.vbt.override_afc_startup = true; - i915->display.vbt.override_afc_startup_val = general->afc_startup_config == 0x1 ? 0x0 : 0x7; + if (display->vbt.version >= 249 && general->afc_startup_config) { + display->vbt.override_afc_startup = true; + display->vbt.override_afc_startup_val = general->afc_startup_config == 1 ? 0 : 7; } - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "BDB_GENERAL_FEATURES int_tv_support %d int_crt_support %d lvds_use_ssc %d lvds_ssc_freq %d display_clock_mode %d fdi_rx_polarity_inverted %d\n", - i915->display.vbt.int_tv_support, - i915->display.vbt.int_crt_support, - i915->display.vbt.lvds_use_ssc, - i915->display.vbt.lvds_ssc_freq, - i915->display.vbt.display_clock_mode, - i915->display.vbt.fdi_rx_polarity_inverted); + display->vbt.int_tv_support, + display->vbt.int_crt_support, + display->vbt.lvds_use_ssc, + display->vbt.lvds_ssc_freq, + display->vbt.display_clock_mode, + display->vbt.fdi_rx_polarity_inverted); } static const struct child_device_config * @@ -1209,7 +1215,7 @@ child_device_ptr(const struct bdb_general_definitions *defs, int i) } static void -parse_sdvo_device_mapping(struct drm_i915_private *i915) +parse_sdvo_device_mapping(struct intel_display *display) { const struct intel_bios_encoder_data *devdata; int count = 0; @@ -1218,19 +1224,19 @@ parse_sdvo_device_mapping(struct drm_i915_private *i915) * Only parse SDVO mappings on gens that could have SDVO. This isn't * accurate and doesn't have to be, as long as it's not too strict. */ - if (!IS_DISPLAY_VER(i915, 3, 7)) { - drm_dbg_kms(&i915->drm, "Skipping SDVO device mapping\n"); + if (!IS_DISPLAY_VER(display, 3, 7)) { + drm_dbg_kms(display->drm, "Skipping SDVO device mapping\n"); return; } - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; struct sdvo_device_mapping *mapping; - if (child->slave_addr != SLAVE_ADDR1 && - child->slave_addr != SLAVE_ADDR2) { + if (child->target_addr != TARGET_ADDR1 && + child->target_addr != TARGET_ADDR2) { /* - * If the slave address is neither 0x70 nor 0x72, + * If the target address is neither 0x70 nor 0x72, * it is not a SDVO device. Skip it. */ continue; @@ -1238,39 +1244,39 @@ parse_sdvo_device_mapping(struct drm_i915_private *i915) if (child->dvo_port != DEVICE_PORT_DVOB && child->dvo_port != DEVICE_PORT_DVOC) { /* skip the incorrect SDVO port */ - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Incorrect SDVO port. Skip it\n"); continue; } - drm_dbg_kms(&i915->drm, - "the SDVO device with slave addr %2x is found on" + drm_dbg_kms(display->drm, + "the SDVO device with target addr %2x is found on" " %s port\n", - child->slave_addr, + child->target_addr, (child->dvo_port == DEVICE_PORT_DVOB) ? "SDVOB" : "SDVOC"); - mapping = &i915->display.vbt.sdvo_mappings[child->dvo_port - 1]; + mapping = &display->vbt.sdvo_mappings[child->dvo_port - 1]; if (!mapping->initialized) { mapping->dvo_port = child->dvo_port; - mapping->slave_addr = child->slave_addr; + mapping->target_addr = child->target_addr; mapping->dvo_wiring = child->dvo_wiring; mapping->ddc_pin = child->ddc_pin; mapping->i2c_pin = child->i2c_pin; mapping->initialized = 1; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "SDVO device: dvo=%x, addr=%x, wiring=%d, ddc_pin=%d, i2c_pin=%d\n", - mapping->dvo_port, mapping->slave_addr, + mapping->dvo_port, mapping->target_addr, mapping->dvo_wiring, mapping->ddc_pin, mapping->i2c_pin); } else { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Maybe one SDVO port is shared by " "two SDVO device.\n"); } - if (child->slave2_addr) { + if (child->target2_addr) { /* Maybe this is a SDVO device with multiple inputs */ /* And the mapping info is not added */ - drm_dbg_kms(&i915->drm, - "there exists the slave2_addr. Maybe this" + drm_dbg_kms(display->drm, + "there exists the target2_addr. Maybe this" " is a SDVO device with multiple inputs.\n"); } count++; @@ -1278,28 +1284,28 @@ parse_sdvo_device_mapping(struct drm_i915_private *i915) if (!count) { /* No SDVO device info is found */ - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "No SDVO device info is found in VBT\n"); } } static void -parse_driver_features(struct drm_i915_private *i915) +parse_driver_features(struct intel_display *display) { const struct bdb_driver_features *driver; - driver = bdb_find_section(i915, BDB_DRIVER_FEATURES); + driver = bdb_find_section(display, BDB_DRIVER_FEATURES); if (!driver) return; - if (DISPLAY_VER(i915) >= 5) { + if (DISPLAY_VER(display) >= 5) { /* * Note that we consider BDB_DRIVER_FEATURE_INT_SDVO_LVDS * to mean "eDP". The VBT spec doesn't agree with that * interpretation, but real world VBTs seem to. */ if (driver->lvds_config != BDB_DRIVER_FEATURE_INT_LVDS) - i915->display.vbt.int_lvds_support = 0; + display->vbt.int_lvds_support = 0; } else { /* * FIXME it's not clear which BDB version has the LVDS config @@ -1312,25 +1318,25 @@ parse_driver_features(struct drm_i915_private *i915) * in the wild with the bits correctly populated. Version * 108 (on i85x) does not have the bits correctly populated. */ - if (i915->display.vbt.version >= 134 && + if (display->vbt.version >= 134 && driver->lvds_config != BDB_DRIVER_FEATURE_INT_LVDS && driver->lvds_config != BDB_DRIVER_FEATURE_INT_SDVO_LVDS) - i915->display.vbt.int_lvds_support = 0; + display->vbt.int_lvds_support = 0; } } static void -parse_panel_driver_features(struct drm_i915_private *i915, +parse_panel_driver_features(struct intel_display *display, struct intel_panel *panel) { const struct bdb_driver_features *driver; - driver = bdb_find_section(i915, BDB_DRIVER_FEATURES); + driver = bdb_find_section(display, BDB_DRIVER_FEATURES); if (!driver) return; - if (i915->display.vbt.version < 228) { - drm_dbg_kms(&i915->drm, "DRRS State Enabled:%d\n", + if (display->vbt.version < 228) { + drm_dbg_kms(display->drm, "DRRS State Enabled:%d\n", driver->drrs_enabled); /* * If DRRS is not supported, drrs_type has to be set to 0. @@ -1354,7 +1360,7 @@ parse_panel_driver_features(struct drm_i915_private *i915, } static void -parse_power_conservation_features(struct drm_i915_private *i915, +parse_power_conservation_features(struct intel_display *display, struct intel_panel *panel) { const struct bdb_lfp_power *power; @@ -1362,10 +1368,10 @@ parse_power_conservation_features(struct drm_i915_private *i915, panel->vbt.vrr = true; /* matches Windows behaviour */ - if (i915->display.vbt.version < 228) + if (display->vbt.version < 228) return; - power = bdb_find_section(i915, BDB_LFP_POWER); + power = bdb_find_section(display, BDB_LFP_POWER); if (!power) return; @@ -1388,16 +1394,16 @@ parse_power_conservation_features(struct drm_i915_private *i915, panel->vbt.drrs_type = DRRS_TYPE_NONE; } - if (i915->display.vbt.version >= 232) + if (display->vbt.version >= 232) panel->vbt.edp.hobl = panel_bool(power->hobl, panel_type); - if (i915->display.vbt.version >= 233) + if (display->vbt.version >= 233) panel->vbt.vrr = panel_bool(power->vrr_feature_enabled, panel_type); } static void -parse_edp(struct drm_i915_private *i915, +parse_edp(struct intel_display *display, struct intel_panel *panel) { const struct bdb_edp *edp; @@ -1405,7 +1411,7 @@ parse_edp(struct drm_i915_private *i915, const struct edp_fast_link_params *edp_link_params; int panel_type = panel->vbt.panel_type; - edp = bdb_find_section(i915, BDB_EDP); + edp = bdb_find_section(display, BDB_EDP); if (!edp) return; @@ -1427,7 +1433,7 @@ parse_edp(struct drm_i915_private *i915, panel->vbt.edp.pps = *edp_pps; - if (i915->display.vbt.version >= 224) { + if (display->vbt.version >= 224) { panel->vbt.edp.rate = edp->edp_fast_link_training_rate[panel_type] * 20; } else { @@ -1442,7 +1448,7 @@ parse_edp(struct drm_i915_private *i915, panel->vbt.edp.rate = 540000; break; default: - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT has unknown eDP link rate value %u\n", edp_link_params->rate); break; @@ -1460,7 +1466,7 @@ parse_edp(struct drm_i915_private *i915, panel->vbt.edp.lanes = 4; break; default: - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT has unknown eDP lane count value %u\n", edp_link_params->lanes); break; @@ -1480,7 +1486,7 @@ parse_edp(struct drm_i915_private *i915, panel->vbt.edp.preemphasis = DP_TRAIN_PRE_EMPH_LEVEL_3; break; default: - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT has unknown eDP pre-emphasis value %u\n", edp_link_params->preemphasis); break; @@ -1500,19 +1506,19 @@ parse_edp(struct drm_i915_private *i915, panel->vbt.edp.vswing = DP_TRAIN_VOLTAGE_SWING_LEVEL_3; break; default: - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT has unknown eDP voltage swing value %u\n", edp_link_params->vswing); break; } - if (i915->display.vbt.version >= 173) { + if (display->vbt.version >= 173) { u8 vswing; /* Don't read from VBT if module parameter has valid value*/ - if (i915->display.params.edp_vswing) { + if (display->params.edp_vswing) { panel->vbt.edp.low_vswing = - i915->display.params.edp_vswing == 1; + display->params.edp_vswing == 1; } else { vswing = (edp->edp_vswing_preemph >> (panel_type * 4)) & 0xF; panel->vbt.edp.low_vswing = vswing == 0; @@ -1522,26 +1528,27 @@ parse_edp(struct drm_i915_private *i915, panel->vbt.edp.drrs_msa_timing_delay = panel_bits(edp->sdrrs_msa_timing_delay, panel_type, 2); - if (i915->display.vbt.version >= 244) + if (display->vbt.version >= 244) panel->vbt.edp.max_link_rate = edp->edp_max_port_link_rate[panel_type] * 20; - if (i915->display.vbt.version >= 251) + if (display->vbt.version >= 251) panel->vbt.edp.dsc_disable = panel_bool(edp->edp_dsc_disable, panel_type); } static void -parse_psr(struct drm_i915_private *i915, +parse_psr(struct intel_display *display, struct intel_panel *panel) { + struct drm_i915_private *i915 = to_i915(display->drm); const struct bdb_psr *psr; const struct psr_table *psr_table; int panel_type = panel->vbt.panel_type; - psr = bdb_find_section(i915, BDB_PSR); + psr = bdb_find_section(display, BDB_PSR); if (!psr) { - drm_dbg_kms(&i915->drm, "No PSR BDB found.\n"); + drm_dbg_kms(display->drm, "No PSR BDB found.\n"); return; } @@ -1558,8 +1565,8 @@ parse_psr(struct drm_i915_private *i915, * New psr options 0=500us, 1=100us, 2=2500us, 3=0us * Old decimal value is wake up time in multiples of 100 us. */ - if (i915->display.vbt.version >= 205 && - (DISPLAY_VER(i915) >= 9 && !IS_BROXTON(i915))) { + if (display->vbt.version >= 205 && + (DISPLAY_VER(display) >= 9 && !IS_BROXTON(i915))) { switch (psr_table->tp1_wakeup_time) { case 0: panel->vbt.psr.tp1_wakeup_time_us = 500; @@ -1571,7 +1578,7 @@ parse_psr(struct drm_i915_private *i915, panel->vbt.psr.tp1_wakeup_time_us = 0; break; default: - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT tp1 wakeup time value %d is outside range[0-3], defaulting to max value 2500us\n", psr_table->tp1_wakeup_time); fallthrough; @@ -1591,7 +1598,7 @@ parse_psr(struct drm_i915_private *i915, panel->vbt.psr.tp2_tp3_wakeup_time_us = 0; break; default: - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT tp2_tp3 wakeup time value %d is outside range[0-3], defaulting to max value 2500us\n", psr_table->tp2_tp3_wakeup_time); fallthrough; @@ -1604,7 +1611,7 @@ parse_psr(struct drm_i915_private *i915, panel->vbt.psr.tp2_tp3_wakeup_time_us = psr_table->tp2_tp3_wakeup_time * 100; } - if (i915->display.vbt.version >= 226) { + if (display->vbt.version >= 226) { u32 wakeup_time = psr->psr2_tp2_tp3_wakeup_time; wakeup_time = panel_bits(wakeup_time, panel_type, 2); @@ -1630,13 +1637,13 @@ parse_psr(struct drm_i915_private *i915, } } -static void parse_dsi_backlight_ports(struct drm_i915_private *i915, +static void parse_dsi_backlight_ports(struct intel_display *display, struct intel_panel *panel, enum port port) { - enum port port_bc = DISPLAY_VER(i915) >= 11 ? PORT_B : PORT_C; + enum port port_bc = DISPLAY_VER(display) >= 11 ? PORT_B : PORT_C; - if (!panel->vbt.dsi.config->dual_link || i915->display.vbt.version < 197) { + if (!panel->vbt.dsi.config->dual_link || display->vbt.version < 197) { panel->vbt.dsi.bl_ports = BIT(port); if (panel->vbt.dsi.config->cabc_supported) panel->vbt.dsi.cabc_ports = BIT(port); @@ -1676,7 +1683,7 @@ static void parse_dsi_backlight_ports(struct drm_i915_private *i915, } static void -parse_mipi_config(struct drm_i915_private *i915, +parse_mipi_config(struct intel_display *display, struct intel_panel *panel) { const struct bdb_mipi_config *start; @@ -1686,27 +1693,19 @@ parse_mipi_config(struct drm_i915_private *i915, enum port port; /* parse MIPI blocks only if LFP type is MIPI */ - if (!intel_bios_is_dsi_present(i915, &port)) + if (!intel_bios_is_dsi_present(display, &port)) return; /* Initialize this to undefined indicating no generic MIPI support */ panel->vbt.dsi.panel_id = MIPI_DSI_UNDEFINED_PANEL_ID; - /* Block #40 is already parsed and panel_fixed_mode is - * stored in i915->lfp_vbt_mode - * resuse this when needed - */ - - /* Parse #52 for panel index used from panel_type already - * parsed - */ - start = bdb_find_section(i915, BDB_MIPI_CONFIG); + start = bdb_find_section(display, BDB_MIPI_CONFIG); if (!start) { - drm_dbg_kms(&i915->drm, "No MIPI config BDB found"); + drm_dbg_kms(display->drm, "No MIPI config BDB found"); return; } - drm_dbg(&i915->drm, "Found MIPI Config block, panel index = %d\n", + drm_dbg(display->drm, "Found MIPI Config block, panel index = %d\n", panel_type); /* @@ -1727,7 +1726,7 @@ parse_mipi_config(struct drm_i915_private *i915, return; } - parse_dsi_backlight_ports(i915, panel, port); + parse_dsi_backlight_ports(display, panel, port); /* FIXME is the 90 vs. 270 correct? */ switch (config->rotation) { @@ -1759,7 +1758,7 @@ parse_mipi_config(struct drm_i915_private *i915, /* Find the sequence block and size for the given panel. */ static const u8 * -find_panel_sequence_block(struct drm_i915_private *i915, +find_panel_sequence_block(struct intel_display *display, const struct bdb_mipi_sequence *sequence, u16 panel_id, u32 *seq_size) { @@ -1777,7 +1776,8 @@ find_panel_sequence_block(struct drm_i915_private *i915, for (i = 0; i < MAX_MIPI_CONFIGURATIONS && index < total; i++) { if (index + header_size > total) { - drm_err(&i915->drm, "Invalid sequence block (header)\n"); + drm_err(display->drm, + "Invalid sequence block (header)\n"); return NULL; } @@ -1790,7 +1790,7 @@ find_panel_sequence_block(struct drm_i915_private *i915, index += header_size; if (index + current_size > total) { - drm_err(&i915->drm, "Invalid sequence block\n"); + drm_err(display->drm, "Invalid sequence block\n"); return NULL; } @@ -1802,12 +1802,13 @@ find_panel_sequence_block(struct drm_i915_private *i915, index += current_size; } - drm_err(&i915->drm, "Sequence block detected but no valid configuration\n"); + drm_err(display->drm, + "Sequence block detected but no valid configuration\n"); return NULL; } -static int goto_next_sequence(struct drm_i915_private *i915, +static int goto_next_sequence(struct intel_display *display, const u8 *data, int index, int total) { u16 len; @@ -1838,7 +1839,7 @@ static int goto_next_sequence(struct drm_i915_private *i915, len = *(data + index + 6) + 7; break; default: - drm_err(&i915->drm, "Unknown operation byte\n"); + drm_err(display->drm, "Unknown operation byte\n"); return 0; } } @@ -1846,7 +1847,7 @@ static int goto_next_sequence(struct drm_i915_private *i915, return 0; } -static int goto_next_sequence_v3(struct drm_i915_private *i915, +static int goto_next_sequence_v3(struct intel_display *display, const u8 *data, int index, int total) { int seq_end; @@ -1858,7 +1859,7 @@ static int goto_next_sequence_v3(struct drm_i915_private *i915, * checking on the structure. */ if (total < 5) { - drm_err(&i915->drm, "Too small sequence size\n"); + drm_err(display->drm, "Too small sequence size\n"); return 0; } @@ -1875,7 +1876,7 @@ static int goto_next_sequence_v3(struct drm_i915_private *i915, seq_end = index + size_of_sequence; if (seq_end > total) { - drm_err(&i915->drm, "Invalid sequence size\n"); + drm_err(display->drm, "Invalid sequence size\n"); return 0; } @@ -1885,7 +1886,8 @@ static int goto_next_sequence_v3(struct drm_i915_private *i915, if (operation_byte == MIPI_SEQ_ELEM_END) { if (index != seq_end) { - drm_err(&i915->drm, "Invalid element structure\n"); + drm_err(display->drm, + "Invalid element structure\n"); return 0; } return index; @@ -1907,7 +1909,7 @@ static int goto_next_sequence_v3(struct drm_i915_private *i915, case MIPI_SEQ_ELEM_PMIC: break; default: - drm_err(&i915->drm, "Unknown operation byte %u\n", + drm_err(display->drm, "Unknown operation byte %u\n", operation_byte); break; } @@ -1920,13 +1922,13 @@ static int goto_next_sequence_v3(struct drm_i915_private *i915, * Get len of pre-fixed deassert fragment from a v1 init OTP sequence, * skip all delay + gpio operands and stop at the first DSI packet op. */ -static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915, +static int get_init_otp_deassert_fragment_len(struct intel_display *display, struct intel_panel *panel) { const u8 *data = panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP]; int index, len; - if (drm_WARN_ON(&i915->drm, + if (drm_WARN_ON(display->drm, !data || panel->vbt.dsi.seq_version != 1)) return 0; @@ -1955,7 +1957,7 @@ static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915, * these devices we split the init OTP sequence into a deassert sequence and * the actual init OTP part. */ -static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915, +static void vlv_fixup_mipi_sequences(struct intel_display *display, struct intel_panel *panel) { u8 *init_otp; @@ -1973,11 +1975,11 @@ static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915, return; /* The deassert-sequence ends at the first DSI packet */ - len = get_init_otp_deassert_fragment_len(i915, panel); + len = get_init_otp_deassert_fragment_len(display, panel); if (!len) return; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Using init OTP fragment to deassert reset\n"); /* Copy the fragment, update seq byte and terminate it */ @@ -2010,29 +2012,32 @@ static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915, * or examine the contents of the sequences to * avoid false positives? */ -static void icl_fixup_mipi_sequences(struct drm_i915_private *i915, +static void icl_fixup_mipi_sequences(struct intel_display *display, struct intel_panel *panel) { if (!panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] && panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]) { - drm_dbg_kms(&i915->drm, "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n"); + drm_dbg_kms(display->drm, + "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n"); swap(panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP], panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]); } } -static void fixup_mipi_sequences(struct drm_i915_private *i915, +static void fixup_mipi_sequences(struct intel_display *display, struct intel_panel *panel) { - if (DISPLAY_VER(i915) >= 11) - icl_fixup_mipi_sequences(i915, panel); + struct drm_i915_private *i915 = to_i915(display->drm); + + if (DISPLAY_VER(display) >= 11) + icl_fixup_mipi_sequences(display, panel); else if (IS_VALLEYVIEW(i915)) - vlv_fixup_mipi_sequences(i915, panel); + vlv_fixup_mipi_sequences(display, panel); } static void -parse_mipi_sequence(struct drm_i915_private *i915, +parse_mipi_sequence(struct intel_display *display, struct intel_panel *panel) { int panel_type = panel->vbt.panel_type; @@ -2046,25 +2051,25 @@ parse_mipi_sequence(struct drm_i915_private *i915, if (panel->vbt.dsi.panel_id != MIPI_DSI_GENERIC_PANEL_ID) return; - sequence = bdb_find_section(i915, BDB_MIPI_SEQUENCE); + sequence = bdb_find_section(display, BDB_MIPI_SEQUENCE); if (!sequence) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "No MIPI Sequence found, parsing complete\n"); return; } /* Fail gracefully for forward incompatible sequence block. */ if (sequence->version >= 4) { - drm_err(&i915->drm, + drm_err(display->drm, "Unable to parse MIPI Sequence Block v%u\n", sequence->version); return; } - drm_dbg(&i915->drm, "Found MIPI sequence block v%u\n", + drm_dbg(display->drm, "Found MIPI sequence block v%u\n", sequence->version); - seq_data = find_panel_sequence_block(i915, sequence, panel_type, &seq_size); + seq_data = find_panel_sequence_block(display, sequence, panel_type, &seq_size); if (!seq_data) return; @@ -2079,24 +2084,24 @@ parse_mipi_sequence(struct drm_i915_private *i915, break; if (seq_id >= MIPI_SEQ_MAX) { - drm_err(&i915->drm, "Unknown sequence %u\n", + drm_err(display->drm, "Unknown sequence %u\n", seq_id); goto err; } /* Log about presence of sequences we won't run. */ if (seq_id == MIPI_SEQ_TEAR_ON || seq_id == MIPI_SEQ_TEAR_OFF) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Unsupported sequence %u\n", seq_id); panel->vbt.dsi.sequence[seq_id] = data + index; if (sequence->version >= 3) - index = goto_next_sequence_v3(i915, data, index, seq_size); + index = goto_next_sequence_v3(display, data, index, seq_size); else - index = goto_next_sequence(i915, data, index, seq_size); + index = goto_next_sequence(display, data, index, seq_size); if (!index) { - drm_err(&i915->drm, "Invalid sequence %u\n", + drm_err(display->drm, "Invalid sequence %u\n", seq_id); goto err; } @@ -2106,9 +2111,9 @@ parse_mipi_sequence(struct drm_i915_private *i915, panel->vbt.dsi.size = seq_size; panel->vbt.dsi.seq_version = sequence->version; - fixup_mipi_sequences(i915, panel); + fixup_mipi_sequences(display, panel); - drm_dbg(&i915->drm, "MIPI related VBT parsing complete\n"); + drm_dbg(display->drm, "MIPI related VBT parsing complete\n"); return; err: @@ -2117,47 +2122,47 @@ err: } static void -parse_compression_parameters(struct drm_i915_private *i915) +parse_compression_parameters(struct intel_display *display) { const struct bdb_compression_parameters *params; struct intel_bios_encoder_data *devdata; u16 block_size; int index; - if (i915->display.vbt.version < 198) + if (display->vbt.version < 198) return; - params = bdb_find_section(i915, BDB_COMPRESSION_PARAMETERS); + params = bdb_find_section(display, BDB_COMPRESSION_PARAMETERS); if (params) { /* Sanity checks */ if (params->entry_size != sizeof(params->data[0])) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT: unsupported compression param entry size\n"); return; } block_size = get_blocksize(params); if (block_size < sizeof(*params)) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT: expected 16 compression param entries\n"); return; } } - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; if (!child->compression_enable) continue; if (!params) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT: compression params not available\n"); continue; } if (child->compression_method_cps) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT: CPS compression not supported\n"); continue; } @@ -2169,12 +2174,12 @@ parse_compression_parameters(struct drm_i915_private *i915) } } -static u8 translate_iboost(struct drm_i915_private *i915, u8 val) +static u8 translate_iboost(struct intel_display *display, u8 val) { static const u8 mapping[] = { 1, 3, 7 }; /* See VBT spec */ if (val >= ARRAY_SIZE(mapping)) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Unsupported I_boost value found in VBT (%d), display may not work properly\n", val); return 0; } @@ -2231,8 +2236,9 @@ static const u8 adlp_ddc_pin_map[] = { [GMBUS_PIN_12_TC4_ICP] = ADLP_DDC_BUS_PORT_TC4, }; -static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) +static u8 map_ddc_pin(struct intel_display *display, u8 vbt_pin) { + struct drm_i915_private *i915 = to_i915(display->drm); const u8 *ddc_pin_map; int i, n_entries; @@ -2247,7 +2253,7 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) } else if (IS_ROCKETLAKE(i915) && INTEL_PCH_TYPE(i915) == PCH_TGP) { ddc_pin_map = rkl_pch_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(rkl_pch_tgp_ddc_pin_map); - } else if (HAS_PCH_TGP(i915) && DISPLAY_VER(i915) == 9) { + } else if (HAS_PCH_TGP(i915) && DISPLAY_VER(display) == 9) { ddc_pin_map = gen9bc_tgp_ddc_pin_map; n_entries = ARRAY_SIZE(gen9bc_tgp_ddc_pin_map); } else if (INTEL_PCH_TYPE(i915) >= PCH_ICP) { @@ -2266,7 +2272,7 @@ static u8 map_ddc_pin(struct drm_i915_private *i915, u8 vbt_pin) return i; } - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Ignoring alternate pin: VBT claims DDC pin %d, which is not valid for this platform\n", vbt_pin); return 0; @@ -2324,9 +2330,10 @@ static enum port __dvo_port_to_port(int n_ports, int n_dvo, return PORT_NONE; } -static enum port dvo_port_to_port(struct drm_i915_private *i915, +static enum port dvo_port_to_port(struct intel_display *display, u8 dvo_port) { + struct drm_i915_private *i915 = to_i915(display->drm); /* * Each DDI port can have more than one value on the "DVO Port" field, * so look for all the possible values for each port. @@ -2378,7 +2385,7 @@ static enum port dvo_port_to_port(struct drm_i915_private *i915, [PORT_TC4] = { DVO_PORT_HDMII, DVO_PORT_DPI, -1 }, }; - if (DISPLAY_VER(i915) >= 13) + if (DISPLAY_VER(display) >= 13) return __dvo_port_to_port(ARRAY_SIZE(xelpd_port_mapping), ARRAY_SIZE(xelpd_port_mapping[0]), xelpd_port_mapping, @@ -2401,13 +2408,13 @@ static enum port dvo_port_to_port(struct drm_i915_private *i915, } static enum port -dsi_dvo_port_to_port(struct drm_i915_private *i915, u8 dvo_port) +dsi_dvo_port_to_port(struct intel_display *display, u8 dvo_port) { switch (dvo_port) { case DVO_PORT_MIPIA: return PORT_A; case DVO_PORT_MIPIC: - if (DISPLAY_VER(i915) >= 11) + if (DISPLAY_VER(display) >= 11) return PORT_B; else return PORT_C; @@ -2418,13 +2425,13 @@ dsi_dvo_port_to_port(struct drm_i915_private *i915, u8 dvo_port) enum port intel_bios_encoder_port(const struct intel_bios_encoder_data *devdata) { - struct drm_i915_private *i915 = devdata->i915; + struct intel_display *display = devdata->display; const struct child_device_config *child = &devdata->child; enum port port; - port = dvo_port_to_port(i915, child->dvo_port); - if (port == PORT_NONE && DISPLAY_VER(i915) >= 11) - port = dsi_dvo_port_to_port(i915, child->dvo_port); + port = dvo_port_to_port(display, child->dvo_port); + if (port == PORT_NONE && DISPLAY_VER(display) >= 11) + port = dsi_dvo_port_to_port(display, child->dvo_port); return port; } @@ -2469,10 +2476,10 @@ static int parse_bdb_216_dp_max_link_rate(const int vbt_max_link_rate) int intel_bios_dp_max_link_rate(const struct intel_bios_encoder_data *devdata) { - if (!devdata || devdata->i915->display.vbt.version < 216) + if (!devdata || devdata->display->vbt.version < 216) return 0; - if (devdata->i915->display.vbt.version >= 230) + if (devdata->display->vbt.version >= 230) return parse_bdb_230_dp_max_link_rate(devdata->child.dp_max_link_rate); else return parse_bdb_216_dp_max_link_rate(devdata->child.dp_max_link_rate); @@ -2480,7 +2487,7 @@ int intel_bios_dp_max_link_rate(const struct intel_bios_encoder_data *devdata) int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata) { - if (!devdata || devdata->i915->display.vbt.version < 244) + if (!devdata || devdata->display->vbt.version < 244) return 0; return devdata->child.dp_max_lane_count + 1; @@ -2489,10 +2496,10 @@ int intel_bios_dp_max_lane_count(const struct intel_bios_encoder_data *devdata) static void sanitize_device_type(struct intel_bios_encoder_data *devdata, enum port port) { - struct drm_i915_private *i915 = devdata->i915; + struct intel_display *display = devdata->display; bool is_hdmi; - if (port != PORT_A || DISPLAY_VER(i915) >= 12) + if (port != PORT_A || DISPLAY_VER(display) >= 12) return; if (!intel_bios_encoder_supports_dvi(devdata)) @@ -2500,7 +2507,7 @@ static void sanitize_device_type(struct intel_bios_encoder_data *devdata, is_hdmi = intel_bios_encoder_supports_hdmi(devdata); - drm_dbg_kms(&i915->drm, "VBT claims port A supports DVI%s, ignoring\n", + drm_dbg_kms(display->drm, "VBT claims port A supports DVI%s, ignoring\n", is_hdmi ? "/HDMI" : ""); devdata->child.device_type &= ~DEVICE_TYPE_TMDS_DVI_SIGNALING; @@ -2510,7 +2517,8 @@ static void sanitize_device_type(struct intel_bios_encoder_data *devdata, static void sanitize_hdmi_level_shift(struct intel_bios_encoder_data *devdata, enum port port) { - struct drm_i915_private *i915 = devdata->i915; + struct intel_display *display = devdata->display; + struct drm_i915_private *i915 = to_i915(display->drm); if (!intel_bios_encoder_supports_dvi(devdata)) return; @@ -2521,7 +2529,8 @@ static void sanitize_hdmi_level_shift(struct intel_bios_encoder_data *devdata, * up to 11, whereas the BDW max is 9. */ if (IS_BROADWELL(i915) && devdata->child.hdmi_level_shifter_value > 9) { - drm_dbg_kms(&i915->drm, "Bogus port %c VBT HDMI level shift %d, adjusting to %d\n", + drm_dbg_kms(display->drm, + "Bogus port %c VBT HDMI level shift %d, adjusting to %d\n", port_name(port), devdata->child.hdmi_level_shifter_value, 9); devdata->child.hdmi_level_shifter_value = 9; @@ -2569,14 +2578,14 @@ intel_bios_encoder_supports_dsi(const struct intel_bios_encoder_data *devdata) bool intel_bios_encoder_is_lspcon(const struct intel_bios_encoder_data *devdata) { - return devdata && HAS_LSPCON(devdata->i915) && devdata->child.lspcon; + return devdata && HAS_LSPCON(devdata->display) && devdata->child.lspcon; } /* This is an index in the HDMI/DVI DDI buffer translation table, or -1 */ int intel_bios_hdmi_level_shift(const struct intel_bios_encoder_data *devdata) { - if (!devdata || devdata->i915->display.vbt.version < 158 || - DISPLAY_VER(devdata->i915) >= 14) + if (!devdata || devdata->display->vbt.version < 158 || + DISPLAY_VER(devdata->display) >= 14) return -1; return devdata->child.hdmi_level_shifter_value; @@ -2584,7 +2593,7 @@ int intel_bios_hdmi_level_shift(const struct intel_bios_encoder_data *devdata) int intel_bios_hdmi_max_tmds_clock(const struct intel_bios_encoder_data *devdata) { - if (!devdata || devdata->i915->display.vbt.version < 204) + if (!devdata || devdata->display->vbt.version < 204) return 0; switch (devdata->child.hdmi_max_data_rate) { @@ -2606,8 +2615,9 @@ int intel_bios_hdmi_max_tmds_clock(const struct intel_bios_encoder_data *devdata } } -static bool is_port_valid(struct drm_i915_private *i915, enum port port) +static bool is_port_valid(struct intel_display *display, enum port port) { + struct drm_i915_private *i915 = to_i915(display->drm); /* * On some ICL SKUs port F is not present, but broken VBTs mark * the port as present. Only try to initialize port F for the @@ -2621,7 +2631,7 @@ static bool is_port_valid(struct drm_i915_private *i915, enum port port) static void print_ddi_port(const struct intel_bios_encoder_data *devdata) { - struct drm_i915_private *i915 = devdata->i915; + struct intel_display *display = devdata->display; const struct child_device_config *child = &devdata->child; bool is_dvi, is_hdmi, is_dp, is_edp, is_dsi, is_crt, supports_typec_usb, supports_tbt; int dp_boost_level, dp_max_link_rate, hdmi_boost_level, hdmi_level_shift, max_tmds_clock; @@ -2641,7 +2651,7 @@ static void print_ddi_port(const struct intel_bios_encoder_data *devdata) supports_typec_usb = intel_bios_encoder_supports_typec_usb(devdata); supports_tbt = intel_bios_encoder_supports_tbt(devdata); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Port %c VBT info: CRT:%d DVI:%d HDMI:%d DP:%d eDP:%d DSI:%d DP++:%d LSPCON:%d USB-Type-C:%d TBT:%d DSC:%d\n", port_name(port), is_crt, is_dvi, is_hdmi, is_dp, is_edp, is_dsi, intel_bios_encoder_supports_dp_dual_mode(devdata), @@ -2651,33 +2661,33 @@ static void print_ddi_port(const struct intel_bios_encoder_data *devdata) hdmi_level_shift = intel_bios_hdmi_level_shift(devdata); if (hdmi_level_shift >= 0) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Port %c VBT HDMI level shift: %d\n", port_name(port), hdmi_level_shift); } max_tmds_clock = intel_bios_hdmi_max_tmds_clock(devdata); if (max_tmds_clock) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Port %c VBT HDMI max TMDS clock: %d kHz\n", port_name(port), max_tmds_clock); /* I_boost config for SKL and above */ dp_boost_level = intel_bios_dp_boost_level(devdata); if (dp_boost_level) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Port %c VBT (e)DP boost level: %d\n", port_name(port), dp_boost_level); hdmi_boost_level = intel_bios_hdmi_boost_level(devdata); if (hdmi_boost_level) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Port %c VBT HDMI boost level: %d\n", port_name(port), hdmi_boost_level); dp_max_link_rate = intel_bios_dp_max_link_rate(devdata); if (dp_max_link_rate) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Port %c VBT DP max link rate: %d\n", port_name(port), dp_max_link_rate); @@ -2685,22 +2695,22 @@ static void print_ddi_port(const struct intel_bios_encoder_data *devdata) * FIXME need to implement support for VBT * vswing/preemph tables should this ever trigger. */ - drm_WARN(&i915->drm, child->use_vbt_vswing, + drm_WARN(display->drm, child->use_vbt_vswing, "Port %c asks to use VBT vswing/preemph tables\n", port_name(port)); } static void parse_ddi_port(struct intel_bios_encoder_data *devdata) { - struct drm_i915_private *i915 = devdata->i915; + struct intel_display *display = devdata->display; enum port port; port = intel_bios_encoder_port(devdata); if (port == PORT_NONE) return; - if (!is_port_valid(i915, port)) { - drm_dbg_kms(&i915->drm, + if (!is_port_valid(display, port)) { + drm_dbg_kms(display->drm, "VBT reports port %c as supported, but that can't be true: skipping\n", port_name(port)); return; @@ -2710,22 +2720,24 @@ static void parse_ddi_port(struct intel_bios_encoder_data *devdata) sanitize_hdmi_level_shift(devdata, port); } -static bool has_ddi_port_info(struct drm_i915_private *i915) +static bool has_ddi_port_info(struct intel_display *display) { - return DISPLAY_VER(i915) >= 5 || IS_G4X(i915); + struct drm_i915_private *i915 = to_i915(display->drm); + + return DISPLAY_VER(display) >= 5 || IS_G4X(i915); } -static void parse_ddi_ports(struct drm_i915_private *i915) +static void parse_ddi_ports(struct intel_display *display) { struct intel_bios_encoder_data *devdata; - if (!has_ddi_port_info(i915)) + if (!has_ddi_port_info(display)) return; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) + list_for_each_entry(devdata, &display->vbt.display_devices, node) parse_ddi_port(devdata); - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) + list_for_each_entry(devdata, &display->vbt.display_devices, node) print_ddi_port(devdata); } @@ -2751,27 +2763,27 @@ static int child_device_expected_size(u16 version) return 22; } -static bool child_device_size_valid(struct drm_i915_private *i915, int size) +static bool child_device_size_valid(struct intel_display *display, int size) { int expected_size; - expected_size = child_device_expected_size(i915->display.vbt.version); + expected_size = child_device_expected_size(display->vbt.version); if (expected_size < 0) { expected_size = sizeof(struct child_device_config); - drm_dbg(&i915->drm, + drm_dbg(display->drm, "Expected child device config size for VBT version %u not known; assuming %d\n", - i915->display.vbt.version, expected_size); + display->vbt.version, expected_size); } /* Flag an error for unexpected size, but continue anyway. */ if (size != expected_size) - drm_err(&i915->drm, + drm_err(display->drm, "Unexpected child device config size %d (expected %d for VBT version %u)\n", - size, expected_size, i915->display.vbt.version); + size, expected_size, display->vbt.version); /* The legacy sized child device config is the minimum we need. */ if (size < LEGACY_CHILD_DEVICE_CONFIG_SIZE) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Child device config size %d is too small.\n", size); return false; @@ -2781,8 +2793,9 @@ static bool child_device_size_valid(struct drm_i915_private *i915, int size) } static void -parse_general_definitions(struct drm_i915_private *i915) +parse_general_definitions(struct intel_display *display) { + struct drm_i915_private *i915 = to_i915(display->drm); const struct bdb_general_definitions *defs; struct intel_bios_encoder_data *devdata; const struct child_device_config *child; @@ -2790,27 +2803,27 @@ parse_general_definitions(struct drm_i915_private *i915) u16 block_size; int bus_pin; - defs = bdb_find_section(i915, BDB_GENERAL_DEFINITIONS); + defs = bdb_find_section(display, BDB_GENERAL_DEFINITIONS); if (!defs) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "No general definition block is found, no devices defined.\n"); return; } block_size = get_blocksize(defs); if (block_size < sizeof(*defs)) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "General definitions block too small (%u)\n", block_size); return; } bus_pin = defs->crt_ddc_gmbus_pin; - drm_dbg_kms(&i915->drm, "crt_ddc_bus_pin: %d\n", bus_pin); + drm_dbg_kms(display->drm, "crt_ddc_bus_pin: %d\n", bus_pin); if (intel_gmbus_is_valid_pin(i915, bus_pin)) - i915->display.vbt.crt_ddc_pin = bus_pin; + display->vbt.crt_ddc_pin = bus_pin; - if (!child_device_size_valid(i915, defs->child_dev_size)) + if (!child_device_size_valid(display, defs->child_dev_size)) return; /* get the number of child device */ @@ -2821,7 +2834,7 @@ parse_general_definitions(struct drm_i915_private *i915) if (!child->device_type) continue; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Found VBT child device with type 0x%x\n", child->device_type); @@ -2829,7 +2842,7 @@ parse_general_definitions(struct drm_i915_private *i915) if (!devdata) break; - devdata->i915 = i915; + devdata->display = display; /* * Copy as much as we know (sizeof) and is available @@ -2839,37 +2852,39 @@ parse_general_definitions(struct drm_i915_private *i915) memcpy(&devdata->child, child, min_t(size_t, defs->child_dev_size, sizeof(*child))); - list_add_tail(&devdata->node, &i915->display.vbt.display_devices); + list_add_tail(&devdata->node, &display->vbt.display_devices); } - if (list_empty(&i915->display.vbt.display_devices)) - drm_dbg_kms(&i915->drm, + if (list_empty(&display->vbt.display_devices)) + drm_dbg_kms(display->drm, "no child dev is parsed from VBT\n"); } /* Common defaults which may be overridden by VBT. */ static void -init_vbt_defaults(struct drm_i915_private *i915) +init_vbt_defaults(struct intel_display *display) { - i915->display.vbt.crt_ddc_pin = GMBUS_PIN_VGADDC; + struct drm_i915_private *i915 = to_i915(display->drm); + + display->vbt.crt_ddc_pin = GMBUS_PIN_VGADDC; /* general features */ - i915->display.vbt.int_tv_support = 1; - i915->display.vbt.int_crt_support = 1; + display->vbt.int_tv_support = 1; + display->vbt.int_crt_support = 1; /* driver features */ - i915->display.vbt.int_lvds_support = 1; + display->vbt.int_lvds_support = 1; /* Default to using SSC */ - i915->display.vbt.lvds_use_ssc = 1; + display->vbt.lvds_use_ssc = 1; /* * Core/SandyBridge/IvyBridge use alternative (120MHz) reference * clock for LVDS. */ - i915->display.vbt.lvds_ssc_freq = intel_bios_ssc_frequency(i915, - !HAS_PCH_SPLIT(i915)); - drm_dbg_kms(&i915->drm, "Set default to SSC at %d kHz\n", - i915->display.vbt.lvds_ssc_freq); + display->vbt.lvds_ssc_freq = intel_bios_ssc_frequency(display, + !HAS_PCH_SPLIT(i915)); + drm_dbg_kms(display->drm, "Set default to SSC at %d kHz\n", + display->vbt.lvds_ssc_freq); } /* Common defaults which may be overridden by VBT. */ @@ -2885,12 +2900,13 @@ init_vbt_panel_defaults(struct intel_panel *panel) /* Defaults to initialize only if there is no VBT. */ static void -init_vbt_missing_defaults(struct drm_i915_private *i915) +init_vbt_missing_defaults(struct intel_display *display) { - unsigned int ports = DISPLAY_RUNTIME_INFO(i915)->port_mask; + struct drm_i915_private *i915 = to_i915(display->drm); + unsigned int ports = DISPLAY_RUNTIME_INFO(display)->port_mask; enum port port; - if (!HAS_DDI(i915) && !IS_CHERRYVIEW(i915)) + if (!HAS_DDI(display) && !IS_CHERRYVIEW(i915)) return; for_each_port_masked(port, ports) { @@ -2910,7 +2926,7 @@ init_vbt_missing_defaults(struct drm_i915_private *i915) if (!devdata) break; - devdata->i915 = i915; + devdata->display = display; child = &devdata->child; if (port == PORT_F) @@ -2929,15 +2945,15 @@ init_vbt_missing_defaults(struct drm_i915_private *i915) if (port == PORT_A) child->device_type |= DEVICE_TYPE_INTERNAL_CONNECTOR; - list_add_tail(&devdata->node, &i915->display.vbt.display_devices); + list_add_tail(&devdata->node, &display->vbt.display_devices); - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Generating default VBT child device with type 0x04%x on port %c\n", child->device_type, port_name(port)); } /* Bypass some minimum baseline VBT version checks */ - i915->display.vbt.version = 155; + display->vbt.version = 155; } static const struct bdb_header *get_bdb_header(const struct vbt_header *vbt) @@ -2949,13 +2965,13 @@ static const struct bdb_header *get_bdb_header(const struct vbt_header *vbt) /** * intel_bios_is_valid_vbt - does the given buffer contain a valid VBT - * @i915: the device + * @display: display device * @buf: pointer to a buffer to validate * @size: size of the buffer * * Returns true on valid VBT. */ -bool intel_bios_is_valid_vbt(struct drm_i915_private *i915, +bool intel_bios_is_valid_vbt(struct intel_display *display, const void *buf, size_t size) { const struct vbt_header *vbt = buf; @@ -2965,17 +2981,18 @@ bool intel_bios_is_valid_vbt(struct drm_i915_private *i915, return false; if (sizeof(struct vbt_header) > size) { - drm_dbg_kms(&i915->drm, "VBT header incomplete\n"); + drm_dbg_kms(display->drm, "VBT header incomplete\n"); return false; } if (memcmp(vbt->signature, "$VBT", 4)) { - drm_dbg_kms(&i915->drm, "VBT invalid signature\n"); + drm_dbg_kms(display->drm, "VBT invalid signature\n"); return false; } if (vbt->vbt_size > size) { - drm_dbg_kms(&i915->drm, "VBT incomplete (vbt_size overflows)\n"); + drm_dbg_kms(display->drm, + "VBT incomplete (vbt_size overflows)\n"); return false; } @@ -2985,48 +3002,48 @@ bool intel_bios_is_valid_vbt(struct drm_i915_private *i915, vbt->bdb_offset, sizeof(struct bdb_header), size)) { - drm_dbg_kms(&i915->drm, "BDB header incomplete\n"); + drm_dbg_kms(display->drm, "BDB header incomplete\n"); return false; } bdb = get_bdb_header(vbt); if (range_overflows_t(size_t, vbt->bdb_offset, bdb->bdb_size, size)) { - drm_dbg_kms(&i915->drm, "BDB incomplete\n"); + drm_dbg_kms(display->drm, "BDB incomplete\n"); return false; } return vbt; } -static struct vbt_header *firmware_get_vbt(struct drm_i915_private *i915, +static struct vbt_header *firmware_get_vbt(struct intel_display *display, size_t *size) { struct vbt_header *vbt = NULL; const struct firmware *fw = NULL; - const char *name = i915->display.params.vbt_firmware; + const char *name = display->params.vbt_firmware; int ret; if (!name || !*name) return NULL; - ret = request_firmware(&fw, name, i915->drm.dev); + ret = request_firmware(&fw, name, display->drm->dev); if (ret) { - drm_err(&i915->drm, + drm_err(display->drm, "Requesting VBT firmware \"%s\" failed (%d)\n", name, ret); return NULL; } - if (intel_bios_is_valid_vbt(i915, fw->data, fw->size)) { + if (intel_bios_is_valid_vbt(display, fw->data, fw->size)) { vbt = kmemdup(fw->data, fw->size, GFP_KERNEL); if (vbt) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Found valid VBT firmware \"%s\"\n", name); if (size) *size = fw->size; } } else { - drm_dbg_kms(&i915->drm, "Invalid VBT firmware \"%s\"\n", + drm_dbg_kms(display->drm, "Invalid VBT firmware \"%s\"\n", name); } @@ -3042,9 +3059,10 @@ static u32 intel_spi_read(struct intel_uncore *uncore, u32 offset) return intel_uncore_read(uncore, PRIMARY_SPI_TRIGGER); } -static struct vbt_header *spi_oprom_get_vbt(struct drm_i915_private *i915, +static struct vbt_header *spi_oprom_get_vbt(struct intel_display *display, size_t *size) { + struct drm_i915_private *i915 = to_i915(display->drm); u32 count, data, found, store = 0; u32 static_region, oprom_offset; u32 oprom_size = 0x200000; @@ -3081,10 +3099,10 @@ static struct vbt_header *spi_oprom_get_vbt(struct drm_i915_private *i915, for (count = 0; count < vbt_size; count += 4) *(vbt + store++) = intel_spi_read(&i915->uncore, found + count); - if (!intel_bios_is_valid_vbt(i915, vbt, vbt_size)) + if (!intel_bios_is_valid_vbt(display, vbt, vbt_size)) goto err_free_vbt; - drm_dbg_kms(&i915->drm, "Found valid VBT in SPI flash\n"); + drm_dbg_kms(display->drm, "Found valid VBT in SPI flash\n"); if (size) *size = vbt_size; @@ -3097,10 +3115,10 @@ err_not_found: return NULL; } -static struct vbt_header *oprom_get_vbt(struct drm_i915_private *i915, +static struct vbt_header *oprom_get_vbt(struct intel_display *display, size_t *sizep) { - struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); void __iomem *p = NULL, *oprom; struct vbt_header *vbt; u16 vbt_size; @@ -3124,13 +3142,13 @@ static struct vbt_header *oprom_get_vbt(struct drm_i915_private *i915, goto err_unmap_oprom; if (sizeof(struct vbt_header) > size) { - drm_dbg(&i915->drm, "VBT header incomplete\n"); + drm_dbg(display->drm, "VBT header incomplete\n"); goto err_unmap_oprom; } vbt_size = ioread16(p + offsetof(struct vbt_header, vbt_size)); if (vbt_size > size) { - drm_dbg(&i915->drm, + drm_dbg(display->drm, "VBT incomplete (vbt_size overflows)\n"); goto err_unmap_oprom; } @@ -3142,7 +3160,7 @@ static struct vbt_header *oprom_get_vbt(struct drm_i915_private *i915, memcpy_fromio(vbt, p, vbt_size); - if (!intel_bios_is_valid_vbt(i915, vbt, vbt_size)) + if (!intel_bios_is_valid_vbt(display, vbt, vbt_size)) goto err_free_vbt; pci_unmap_rom(pdev, oprom); @@ -3150,7 +3168,7 @@ static struct vbt_header *oprom_get_vbt(struct drm_i915_private *i915, if (sizep) *sizep = vbt_size; - drm_dbg_kms(&i915->drm, "Found valid VBT in PCI ROM\n"); + drm_dbg_kms(display->drm, "Found valid VBT in PCI ROM\n"); return vbt; @@ -3162,16 +3180,17 @@ err_unmap_oprom: return NULL; } -static const struct vbt_header *intel_bios_get_vbt(struct drm_i915_private *i915, +static const struct vbt_header *intel_bios_get_vbt(struct intel_display *display, size_t *sizep) { + struct drm_i915_private *i915 = to_i915(display->drm); const struct vbt_header *vbt = NULL; intel_wakeref_t wakeref; - vbt = firmware_get_vbt(i915, sizep); + vbt = firmware_get_vbt(display, sizep); if (!vbt) - vbt = intel_opregion_get_vbt(i915, sizep); + vbt = intel_opregion_get_vbt(display, sizep); /* * If the OpRegion does not have VBT, look in SPI flash @@ -3179,76 +3198,77 @@ static const struct vbt_header *intel_bios_get_vbt(struct drm_i915_private *i915 */ if (!vbt && IS_DGFX(i915)) with_intel_runtime_pm(&i915->runtime_pm, wakeref) - vbt = spi_oprom_get_vbt(i915, sizep); + vbt = spi_oprom_get_vbt(display, sizep); if (!vbt) with_intel_runtime_pm(&i915->runtime_pm, wakeref) - vbt = oprom_get_vbt(i915, sizep); + vbt = oprom_get_vbt(display, sizep); return vbt; } /** * intel_bios_init - find VBT and initialize settings from the BIOS - * @i915: i915 device instance + * @display: display device instance * * Parse and initialize settings from the Video BIOS Tables (VBT). If the VBT * was not found in ACPI OpRegion, try to find it in PCI ROM first. Also * initialize some defaults if the VBT is not present at all. */ -void intel_bios_init(struct drm_i915_private *i915) +void intel_bios_init(struct intel_display *display) { const struct vbt_header *vbt; const struct bdb_header *bdb; - INIT_LIST_HEAD(&i915->display.vbt.display_devices); - INIT_LIST_HEAD(&i915->display.vbt.bdb_blocks); + INIT_LIST_HEAD(&display->vbt.display_devices); + INIT_LIST_HEAD(&display->vbt.bdb_blocks); - if (!HAS_DISPLAY(i915)) { - drm_dbg_kms(&i915->drm, + if (!HAS_DISPLAY(display)) { + drm_dbg_kms(display->drm, "Skipping VBT init due to disabled display.\n"); return; } - init_vbt_defaults(i915); + init_vbt_defaults(display); - vbt = intel_bios_get_vbt(i915, NULL); + vbt = intel_bios_get_vbt(display, NULL); if (!vbt) goto out; bdb = get_bdb_header(vbt); - i915->display.vbt.version = bdb->version; + display->vbt.version = bdb->version; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "VBT signature \"%.*s\", BDB version %d\n", - (int)sizeof(vbt->signature), vbt->signature, i915->display.vbt.version); + (int)sizeof(vbt->signature), vbt->signature, + display->vbt.version); - init_bdb_blocks(i915, bdb); + init_bdb_blocks(display, bdb); /* Grab useful general definitions */ - parse_general_features(i915); - parse_general_definitions(i915); - parse_driver_features(i915); + parse_general_features(display); + parse_general_definitions(display); + parse_driver_features(display); /* Depends on child device list */ - parse_compression_parameters(i915); + parse_compression_parameters(display); out: if (!vbt) { - drm_info(&i915->drm, + drm_info(display->drm, "Failed to find VBIOS tables (VBT)\n"); - init_vbt_missing_defaults(i915); + init_vbt_missing_defaults(display); } /* Further processing on pre-parsed or generated child device data */ - parse_sdvo_device_mapping(i915); - parse_ddi_ports(i915); + parse_sdvo_device_mapping(display); + parse_ddi_ports(display); kfree(vbt); } -static void intel_bios_init_panel(struct drm_i915_private *i915, +static void intel_bios_init_panel(struct intel_display *display, struct intel_panel *panel, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid, @@ -3256,63 +3276,64 @@ static void intel_bios_init_panel(struct drm_i915_private *i915, { /* already have it? */ if (panel->vbt.panel_type >= 0) { - drm_WARN_ON(&i915->drm, !use_fallback); + drm_WARN_ON(display->drm, !use_fallback); return; } - panel->vbt.panel_type = get_panel_type(i915, devdata, + panel->vbt.panel_type = get_panel_type(display, devdata, drm_edid, use_fallback); if (panel->vbt.panel_type < 0) { - drm_WARN_ON(&i915->drm, use_fallback); + drm_WARN_ON(display->drm, use_fallback); return; } init_vbt_panel_defaults(panel); - parse_panel_options(i915, panel); - parse_generic_dtd(i915, panel); - parse_lfp_data(i915, panel); - parse_lfp_backlight(i915, panel); - parse_sdvo_lvds_data(i915, panel); - parse_panel_driver_features(i915, panel); - parse_power_conservation_features(i915, panel); - parse_edp(i915, panel); - parse_psr(i915, panel); - parse_mipi_config(i915, panel); - parse_mipi_sequence(i915, panel); + parse_panel_options(display, panel); + parse_generic_dtd(display, panel); + parse_lfp_data(display, panel); + parse_lfp_backlight(display, panel); + parse_sdvo_lvds_data(display, panel); + parse_panel_driver_features(display, panel); + parse_power_conservation_features(display, panel); + parse_edp(display, panel); + parse_psr(display, panel); + parse_mipi_config(display, panel); + parse_mipi_sequence(display, panel); } -void intel_bios_init_panel_early(struct drm_i915_private *i915, +void intel_bios_init_panel_early(struct intel_display *display, struct intel_panel *panel, const struct intel_bios_encoder_data *devdata) { - intel_bios_init_panel(i915, panel, devdata, NULL, false); + intel_bios_init_panel(display, panel, devdata, NULL, false); } -void intel_bios_init_panel_late(struct drm_i915_private *i915, +void intel_bios_init_panel_late(struct intel_display *display, struct intel_panel *panel, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid) { - intel_bios_init_panel(i915, panel, devdata, drm_edid, true); + intel_bios_init_panel(display, panel, devdata, drm_edid, true); } /** * intel_bios_driver_remove - Free any resources allocated by intel_bios_init() - * @i915: i915 device instance + * @display: display device instance */ -void intel_bios_driver_remove(struct drm_i915_private *i915) +void intel_bios_driver_remove(struct intel_display *display) { struct intel_bios_encoder_data *devdata, *nd; struct bdb_block_entry *entry, *ne; - list_for_each_entry_safe(devdata, nd, &i915->display.vbt.display_devices, node) { + list_for_each_entry_safe(devdata, nd, &display->vbt.display_devices, + node) { list_del(&devdata->node); kfree(devdata->dsc); kfree(devdata); } - list_for_each_entry_safe(entry, ne, &i915->display.vbt.bdb_blocks, node) { + list_for_each_entry_safe(entry, ne, &display->vbt.bdb_blocks, node) { list_del(&entry->node); kfree(entry); } @@ -3336,22 +3357,22 @@ void intel_bios_fini_panel(struct intel_panel *panel) /** * intel_bios_is_tv_present - is integrated TV present in VBT - * @i915: i915 device instance + * @display: display device instance * * Return true if TV is present. If no child devices were parsed from VBT, * assume TV is present. */ -bool intel_bios_is_tv_present(struct drm_i915_private *i915) +bool intel_bios_is_tv_present(struct intel_display *display) { const struct intel_bios_encoder_data *devdata; - if (!i915->display.vbt.int_tv_support) + if (!display->vbt.int_tv_support) return false; - if (list_empty(&i915->display.vbt.display_devices)) + if (list_empty(&display->vbt.display_devices)) return true; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; /* @@ -3377,20 +3398,21 @@ bool intel_bios_is_tv_present(struct drm_i915_private *i915) /** * intel_bios_is_lvds_present - is LVDS present in VBT - * @i915: i915 device instance + * @display: display device instance * @i2c_pin: i2c pin for LVDS if present * * Return true if LVDS is present. If no child devices were parsed from VBT, * assume LVDS is present. */ -bool intel_bios_is_lvds_present(struct drm_i915_private *i915, u8 *i2c_pin) +bool intel_bios_is_lvds_present(struct intel_display *display, u8 *i2c_pin) { + struct drm_i915_private *i915 = to_i915(display->drm); const struct intel_bios_encoder_data *devdata; - if (list_empty(&i915->display.vbt.display_devices)) + if (list_empty(&display->vbt.display_devices)) return true; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; /* If the device type is not LFP, continue. @@ -3417,7 +3439,7 @@ bool intel_bios_is_lvds_present(struct drm_i915_private *i915, u8 *i2c_pin) * additional data. Trust that if the VBT was written into * the OpRegion then they have validated the LVDS's existence. */ - return intel_opregion_vbt_present(i915); + return intel_opregion_vbt_present(display); } return false; @@ -3425,25 +3447,25 @@ bool intel_bios_is_lvds_present(struct drm_i915_private *i915, u8 *i2c_pin) /** * intel_bios_is_port_present - is the specified digital port present - * @i915: i915 device instance + * @display: display device instance * @port: port to check * * Return true if the device in %port is present. */ -bool intel_bios_is_port_present(struct drm_i915_private *i915, enum port port) +bool intel_bios_is_port_present(struct intel_display *display, enum port port) { const struct intel_bios_encoder_data *devdata; - if (WARN_ON(!has_ddi_port_info(i915))) + if (WARN_ON(!has_ddi_port_info(display))) return true; - if (!is_port_valid(i915, port)) + if (!is_port_valid(display, port)) return false; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; - if (dvo_port_to_port(i915, child->dvo_port) == port) + if (dvo_port_to_port(display, child->dvo_port) == port) return true; } @@ -3474,32 +3496,32 @@ bool intel_bios_encoder_supports_dp_dual_mode(const struct intel_bios_encoder_da /** * intel_bios_is_dsi_present - is DSI present in VBT - * @i915: i915 device instance + * @display: display device instance * @port: port for DSI if present * * Return true if DSI is present, and return the port in %port. */ -bool intel_bios_is_dsi_present(struct drm_i915_private *i915, +bool intel_bios_is_dsi_present(struct intel_display *display, enum port *port) { const struct intel_bios_encoder_data *devdata; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; u8 dvo_port = child->dvo_port; if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT)) continue; - if (dsi_dvo_port_to_port(i915, dvo_port) == PORT_NONE) { - drm_dbg_kms(&i915->drm, + if (dsi_dvo_port_to_port(display, dvo_port) == PORT_NONE) { + drm_dbg_kms(display->drm, "VBT has unsupported DSI port %c\n", port_name(dvo_port - DVO_PORT_MIPIA)); continue; } if (port) - *port = dsi_dvo_port_to_port(i915, dvo_port); + *port = dsi_dvo_port_to_port(display, dvo_port); return true; } @@ -3510,7 +3532,7 @@ static void fill_dsc(struct intel_crtc_state *crtc_state, struct dsc_compression_parameters_entry *dsc, int dsc_max_bpc) { - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; int bpc = 8; @@ -3524,13 +3546,13 @@ static void fill_dsc(struct intel_crtc_state *crtc_state, else if (dsc->support_8bpc && dsc_max_bpc >= 8) bpc = 8; else - drm_dbg_kms(&i915->drm, "VBT: Unsupported BPC %d for DCS\n", + drm_dbg_kms(display->drm, "VBT: Unsupported BPC %d for DCS\n", dsc_max_bpc); crtc_state->pipe_bpp = bpc * 3; - crtc_state->dsc.compressed_bpp_x16 = to_bpp_x16(min(crtc_state->pipe_bpp, - VBT_DSC_MAX_BPP(dsc->max_bpp))); + crtc_state->dsc.compressed_bpp_x16 = fxp_q4_from_int(min(crtc_state->pipe_bpp, + VBT_DSC_MAX_BPP(dsc->max_bpp))); /* * FIXME: This is ugly, and slice count should take DSC engine @@ -3545,14 +3567,16 @@ static void fill_dsc(struct intel_crtc_state *crtc_state, } else { /* FIXME */ if (!(dsc->slices_per_line & BIT(0))) - drm_dbg_kms(&i915->drm, "VBT: Unsupported DSC slice count for DSI\n"); + drm_dbg_kms(display->drm, + "VBT: Unsupported DSC slice count for DSI\n"); crtc_state->dsc.slice_count = 1; } if (crtc_state->hw.adjusted_mode.crtc_hdisplay % crtc_state->dsc.slice_count != 0) - drm_dbg_kms(&i915->drm, "VBT: DSC hdisplay %d not divisible by slice count %d\n", + drm_dbg_kms(display->drm, + "VBT: DSC hdisplay %d not divisible by slice count %d\n", crtc_state->hw.adjusted_mode.crtc_hdisplay, crtc_state->dsc.slice_count); @@ -3576,16 +3600,16 @@ bool intel_bios_get_dsc_params(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, int dsc_max_bpc) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); const struct intel_bios_encoder_data *devdata; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { const struct child_device_config *child = &devdata->child; if (!(child->device_type & DEVICE_TYPE_MIPI_OUTPUT)) continue; - if (dsi_dvo_port_to_port(i915, child->dvo_port) == encoder->port) { + if (dsi_dvo_port_to_port(display, child->dvo_port) == encoder->port) { if (!devdata->dsc) return false; @@ -3645,12 +3669,13 @@ static const u8 direct_aux_ch_map[] = { [AUX_CH_I] = DP_AUX_I, /* aka AUX_CH_USBC6 */ }; -static enum aux_ch map_aux_ch(struct drm_i915_private *i915, u8 aux_channel) +static enum aux_ch map_aux_ch(struct intel_display *display, u8 aux_channel) { + struct drm_i915_private *i915 = to_i915(display->drm); const u8 *aux_ch_map; int i, n_entries; - if (DISPLAY_VER(i915) >= 13) { + if (DISPLAY_VER(display) >= 13) { aux_ch_map = adlp_aux_ch_map; n_entries = ARRAY_SIZE(adlp_aux_ch_map); } else if (IS_ALDERLAKE_S(i915)) { @@ -3669,7 +3694,7 @@ static enum aux_ch map_aux_ch(struct drm_i915_private *i915, u8 aux_channel) return i; } - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Ignoring alternate AUX CH: VBT claims AUX 0x%x, which is not valid for this platform\n", aux_channel); @@ -3681,22 +3706,22 @@ enum aux_ch intel_bios_dp_aux_ch(const struct intel_bios_encoder_data *devdata) if (!devdata || !devdata->child.aux_channel) return AUX_CH_NONE; - return map_aux_ch(devdata->i915, devdata->child.aux_channel); + return map_aux_ch(devdata->display, devdata->child.aux_channel); } bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devdata) { - struct drm_i915_private *i915; + struct intel_display *display; u8 aux_channel; int count = 0; if (!devdata || !devdata->child.aux_channel) return false; - i915 = devdata->i915; + display = devdata->display; aux_channel = devdata->child.aux_channel; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { if (intel_bios_encoder_supports_dp(devdata) && aux_channel == devdata->child.aux_channel) count++; @@ -3707,18 +3732,18 @@ bool intel_bios_dp_has_shared_aux_ch(const struct intel_bios_encoder_data *devda int intel_bios_dp_boost_level(const struct intel_bios_encoder_data *devdata) { - if (!devdata || devdata->i915->display.vbt.version < 196 || !devdata->child.iboost) + if (!devdata || devdata->display->vbt.version < 196 || !devdata->child.iboost) return 0; - return translate_iboost(devdata->i915, devdata->child.dp_iboost_level); + return translate_iboost(devdata->display, devdata->child.dp_iboost_level); } int intel_bios_hdmi_boost_level(const struct intel_bios_encoder_data *devdata) { - if (!devdata || devdata->i915->display.vbt.version < 196 || !devdata->child.iboost) + if (!devdata || devdata->display->vbt.version < 196 || !devdata->child.iboost) return 0; - return translate_iboost(devdata->i915, devdata->child.hdmi_iboost_level); + return translate_iboost(devdata->display, devdata->child.hdmi_iboost_level); } int intel_bios_hdmi_ddc_pin(const struct intel_bios_encoder_data *devdata) @@ -3726,17 +3751,17 @@ int intel_bios_hdmi_ddc_pin(const struct intel_bios_encoder_data *devdata) if (!devdata || !devdata->child.ddc_pin) return 0; - return map_ddc_pin(devdata->i915, devdata->child.ddc_pin); + return map_ddc_pin(devdata->display, devdata->child.ddc_pin); } bool intel_bios_encoder_supports_typec_usb(const struct intel_bios_encoder_data *devdata) { - return devdata->i915->display.vbt.version >= 195 && devdata->child.dp_usb_type_c; + return devdata->display->vbt.version >= 195 && devdata->child.dp_usb_type_c; } bool intel_bios_encoder_supports_tbt(const struct intel_bios_encoder_data *devdata) { - return devdata->i915->display.vbt.version >= 209 && devdata->child.tbt; + return devdata->display->vbt.version >= 209 && devdata->child.tbt; } bool intel_bios_encoder_lane_reversal(const struct intel_bios_encoder_data *devdata) @@ -3750,11 +3775,11 @@ bool intel_bios_encoder_hpd_invert(const struct intel_bios_encoder_data *devdata } const struct intel_bios_encoder_data * -intel_bios_encoder_data_lookup(struct drm_i915_private *i915, enum port port) +intel_bios_encoder_data_lookup(struct intel_display *display, enum port port) { struct intel_bios_encoder_data *devdata; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) { + list_for_each_entry(devdata, &display->vbt.display_devices, node) { if (intel_bios_encoder_port(devdata) == port) return devdata; } @@ -3762,23 +3787,23 @@ intel_bios_encoder_data_lookup(struct drm_i915_private *i915, enum port port) return NULL; } -void intel_bios_for_each_encoder(struct drm_i915_private *i915, - void (*func)(struct drm_i915_private *i915, +void intel_bios_for_each_encoder(struct intel_display *display, + void (*func)(struct intel_display *display, const struct intel_bios_encoder_data *devdata)) { struct intel_bios_encoder_data *devdata; - list_for_each_entry(devdata, &i915->display.vbt.display_devices, node) - func(i915, devdata); + list_for_each_entry(devdata, &display->vbt.display_devices, node) + func(display, devdata); } static int intel_bios_vbt_show(struct seq_file *m, void *unused) { - struct drm_i915_private *i915 = m->private; + struct intel_display *display = m->private; const void *vbt; size_t vbt_size; - vbt = intel_bios_get_vbt(i915, &vbt_size); + vbt = intel_bios_get_vbt(display, &vbt_size); if (vbt) { seq_write(m, vbt, vbt_size); @@ -3790,10 +3815,10 @@ static int intel_bios_vbt_show(struct seq_file *m, void *unused) DEFINE_SHOW_ATTRIBUTE(intel_bios_vbt); -void intel_bios_debugfs_register(struct drm_i915_private *i915) +void intel_bios_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = i915->drm.primary; + struct drm_minor *minor = display->drm->primary; debugfs_create_file("i915_vbt", 0444, minor->debugfs_root, - i915, &intel_bios_vbt_fops); + display, &intel_bios_vbt_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h index 06a51be4afd8..8b703f6cfe17 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.h +++ b/drivers/gpu/drm/i915/display/intel_bios.h @@ -33,9 +33,9 @@ #include <linux/types.h> struct drm_edid; -struct drm_i915_private; struct intel_bios_encoder_data; struct intel_crtc_state; +struct intel_display; struct intel_encoder; struct intel_panel; enum aux_ch; @@ -232,28 +232,28 @@ struct mipi_pps_data { u16 panel_power_cycle_delay; } __packed; -void intel_bios_init(struct drm_i915_private *dev_priv); -void intel_bios_init_panel_early(struct drm_i915_private *dev_priv, +void intel_bios_init(struct intel_display *display); +void intel_bios_init_panel_early(struct intel_display *display, struct intel_panel *panel, const struct intel_bios_encoder_data *devdata); -void intel_bios_init_panel_late(struct drm_i915_private *dev_priv, +void intel_bios_init_panel_late(struct intel_display *display, struct intel_panel *panel, const struct intel_bios_encoder_data *devdata, const struct drm_edid *drm_edid); void intel_bios_fini_panel(struct intel_panel *panel); -void intel_bios_driver_remove(struct drm_i915_private *dev_priv); -bool intel_bios_is_valid_vbt(struct drm_i915_private *i915, +void intel_bios_driver_remove(struct intel_display *display); +bool intel_bios_is_valid_vbt(struct intel_display *display, const void *buf, size_t size); -bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv); -bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin); -bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port); -bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, enum port *port); +bool intel_bios_is_tv_present(struct intel_display *display); +bool intel_bios_is_lvds_present(struct intel_display *display, u8 *i2c_pin); +bool intel_bios_is_port_present(struct intel_display *display, enum port port); +bool intel_bios_is_dsi_present(struct intel_display *display, enum port *port); bool intel_bios_get_dsc_params(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, int dsc_max_bpc); const struct intel_bios_encoder_data * -intel_bios_encoder_data_lookup(struct drm_i915_private *i915, enum port port); +intel_bios_encoder_data_lookup(struct intel_display *display, enum port port); bool intel_bios_encoder_supports_dvi(const struct intel_bios_encoder_data *devdata); bool intel_bios_encoder_supports_hdmi(const struct intel_bios_encoder_data *devdata); @@ -277,10 +277,10 @@ int intel_bios_hdmi_ddc_pin(const struct intel_bios_encoder_data *devdata); int intel_bios_hdmi_level_shift(const struct intel_bios_encoder_data *devdata); int intel_bios_hdmi_max_tmds_clock(const struct intel_bios_encoder_data *devdata); -void intel_bios_for_each_encoder(struct drm_i915_private *i915, - void (*func)(struct drm_i915_private *i915, +void intel_bios_for_each_encoder(struct intel_display *display, + void (*func)(struct intel_display *display, const struct intel_bios_encoder_data *devdata)); -void intel_bios_debugfs_register(struct drm_i915_private *i915); +void intel_bios_debugfs_register(struct intel_display *display); #endif /* _INTEL_BIOS_H_ */ diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 16d5550f7e5e..aa3ba66c5307 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -23,7 +23,10 @@ #include <linux/time.h> +#include <drm/drm_fixed.h> + #include "soc/intel_dram.h" + #include "hsw_ips.h" #include "i915_reg.h" #include "intel_atomic.h" @@ -2750,7 +2753,7 @@ static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state) */ int bigjoiner_interface_bits = DISPLAY_VER(i915) >= 14 ? 36 : 24; int min_cdclk_bj = - (to_bpp_int_roundup(crtc_state->dsc.compressed_bpp_x16) * + (fxp_q4_to_int_roundup(crtc_state->dsc.compressed_bpp_x16) * pixel_clock) / (2 * bigjoiner_interface_bits); min_cdclk = max(min_cdclk, min_cdclk_bj); diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 7ac50aacec73..5d701f48351b 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1313,8 +1313,8 @@ static void ilk_lut_write(const struct intel_crtc_state *crtc_state, { struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - if (crtc_state->dsb) - intel_dsb_reg_write(crtc_state->dsb, reg, val); + if (crtc_state->dsb_color_vblank) + intel_dsb_reg_write(crtc_state->dsb_color_vblank, reg, val); else intel_de_write_fw(i915, reg, val); } @@ -1337,15 +1337,15 @@ static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state, * unless we either write each entry twice, * or use non-posted writes */ - if (crtc_state->dsb) - intel_dsb_nonpost_start(crtc_state->dsb); + if (crtc_state->dsb_color_vblank) + intel_dsb_nonpost_start(crtc_state->dsb_color_vblank); for (i = 0; i < 256; i++) ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i), i9xx_lut_8(&lut[i])); - if (crtc_state->dsb) - intel_dsb_nonpost_end(crtc_state->dsb); + if (crtc_state->dsb_color_vblank) + intel_dsb_nonpost_end(crtc_state->dsb_color_vblank); } static void ilk_load_lut_10(const struct intel_crtc_state *crtc_state, @@ -1870,7 +1870,7 @@ void intel_color_load_luts(const struct intel_crtc_state *crtc_state) { struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - if (crtc_state->dsb) + if (crtc_state->dsb_color_vblank) return; i915->display.funcs.color->load_luts(crtc_state); @@ -1890,8 +1890,8 @@ void intel_color_commit_arm(const struct intel_crtc_state *crtc_state) i915->display.funcs.color->color_commit_arm(crtc_state); - if (crtc_state->dsb) - intel_dsb_commit(crtc_state->dsb, true); + if (crtc_state->dsb_color_commit) + intel_dsb_commit(crtc_state->dsb_color_commit, false); } void intel_color_post_update(const struct intel_crtc_state *crtc_state) @@ -1919,33 +1919,51 @@ void intel_color_prepare_commit(struct intel_atomic_state *state, if (!crtc_state->pre_csc_lut && !crtc_state->post_csc_lut) return; - crtc_state->dsb = intel_dsb_prepare(state, crtc, INTEL_DSB_0, 1024); - if (!crtc_state->dsb) + crtc_state->dsb_color_vblank = intel_dsb_prepare(state, crtc, INTEL_DSB_1, 1024); + if (!crtc_state->dsb_color_vblank) return; i915->display.funcs.color->load_luts(crtc_state); - intel_dsb_finish(crtc_state->dsb); + intel_dsb_finish(crtc_state->dsb_color_vblank); + + crtc_state->dsb_color_commit = intel_dsb_prepare(state, crtc, INTEL_DSB_0, 16); + if (!crtc_state->dsb_color_commit) { + intel_dsb_cleanup(crtc_state->dsb_color_vblank); + crtc_state->dsb_color_vblank = NULL; + return; + } + + intel_dsb_chain(state, crtc_state->dsb_color_commit, + crtc_state->dsb_color_vblank, true); + + intel_dsb_finish(crtc_state->dsb_color_commit); } void intel_color_cleanup_commit(struct intel_crtc_state *crtc_state) { - if (!crtc_state->dsb) - return; + if (crtc_state->dsb_color_commit) { + intel_dsb_cleanup(crtc_state->dsb_color_commit); + crtc_state->dsb_color_commit = NULL; + } - intel_dsb_cleanup(crtc_state->dsb); - crtc_state->dsb = NULL; + if (crtc_state->dsb_color_vblank) { + intel_dsb_cleanup(crtc_state->dsb_color_vblank); + crtc_state->dsb_color_vblank = NULL; + } } void intel_color_wait_commit(const struct intel_crtc_state *crtc_state) { - if (crtc_state->dsb) - intel_dsb_wait(crtc_state->dsb); + if (crtc_state->dsb_color_commit) + intel_dsb_wait(crtc_state->dsb_color_commit); + if (crtc_state->dsb_color_vblank) + intel_dsb_wait(crtc_state->dsb_color_vblank); } bool intel_color_uses_dsb(const struct intel_crtc_state *crtc_state) { - return crtc_state->dsb; + return crtc_state->dsb_color_vblank; } static bool intel_can_preload_luts(struct intel_atomic_state *state, diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c index 143d66951631..3252dab56430 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c @@ -159,9 +159,11 @@ static bool icl_combo_phy_enabled(struct drm_i915_private *dev_priv, static bool ehl_vbt_ddi_d_present(struct drm_i915_private *i915) { - bool ddi_a_present = intel_bios_is_port_present(i915, PORT_A); - bool ddi_d_present = intel_bios_is_port_present(i915, PORT_D); - bool dsi_present = intel_bios_is_dsi_present(i915, NULL); + struct intel_display *display = &i915->display; + + bool ddi_a_present = intel_bios_is_port_present(display, PORT_A); + bool ddi_d_present = intel_bios_is_port_present(display, PORT_D); + bool dsi_present = intel_bios_is_dsi_present(display, NULL); /* * VBT's 'dvo port' field for child devices references the DDI, not diff --git a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c index 6df526e189b5..705ec5ad385c 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c +++ b/drivers/gpu/drm/i915/display/intel_crtc_state_dump.c @@ -10,6 +10,7 @@ #include "intel_crtc_state_dump.h" #include "intel_display_types.h" #include "intel_hdmi.h" +#include "intel_vdsc.h" #include "intel_vrr.h" static void intel_dump_crtc_timings(struct drm_printer *p, @@ -369,6 +370,8 @@ void intel_crtc_state_dump(const struct intel_crtc_state *pipe_config, else if (IS_VALLEYVIEW(i915)) vlv_dump_csc(&p, "wgc csc", &pipe_config->csc); + intel_vdsc_state_dump(&p, 0, pipe_config); + dump_planes: if (!state) return; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index a07aca96e551..25ff3ff0ab95 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4172,7 +4172,8 @@ static void intel_ddi_sync_state(struct intel_encoder *encoder, intel_tc_port_sanitize_mode(enc_to_dig_port(encoder), crtc_state); - if (intel_encoder_is_dp(encoder)) + if ((crtc_state && intel_crtc_has_dp_encoder(crtc_state)) || + (!crtc_state && intel_encoder_is_dp(encoder))) intel_dp_sync_state(encoder, crtc_state); } @@ -4853,9 +4854,10 @@ static bool port_in_use(struct drm_i915_private *i915, enum port port) return false; } -void intel_ddi_init(struct drm_i915_private *dev_priv, +void intel_ddi_init(struct intel_display *display, const struct intel_bios_encoder_data *devdata) { + struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_digital_port *dig_port; struct intel_encoder *encoder; bool init_hdmi, init_dp; @@ -4898,7 +4900,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, * driver. In that case we should skip initializing the corresponding * outputs. */ - if (intel_hti_uses_phy(dev_priv, phy)) { + if (intel_hti_uses_phy(display, phy)) { drm_dbg_kms(&dev_priv->drm, "PORT %c / PHY %c reserved by HTI\n", port_name(port), phy_name(phy)); return; @@ -4972,7 +4974,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, } else { drm_encoder_init(&dev_priv->drm, &encoder->base, &intel_ddi_funcs, DRM_MODE_ENCODER_TMDS, - "DDI %c/PHY %c", port_name(port), phy_name(phy)); + "DDI %c/PHY %c", port_name(port), phy_name(phy)); } intel_encoder_link_check_init(encoder, intel_ddi_link_check); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index 434de7196875..6d85422bdefe 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -15,6 +15,7 @@ struct intel_bios_encoder_data; struct intel_connector; struct intel_crtc; struct intel_crtc_state; +struct intel_display; struct intel_dp; struct intel_dpll_hw_state; struct intel_encoder; @@ -53,7 +54,7 @@ void hsw_prepare_dp_ddi_buffers(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, enum port port); -void intel_ddi_init(struct drm_i915_private *dev_priv, +void intel_ddi_init(struct intel_display *display, const struct intel_bios_encoder_data *devdata); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); void intel_ddi_enable_transcoder_func(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c2c388212e2e..78ce402a5cd0 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -39,6 +39,7 @@ #include <drm/drm_atomic_uapi.h> #include <drm/drm_damage_helper.h> #include <drm/drm_edid.h> +#include <drm/drm_fixed.h> #include <drm/drm_fourcc.h> #include <drm/drm_probe_helper.h> #include <drm/drm_rect.h> @@ -1014,9 +1015,14 @@ static bool cmrr_params_changed(const struct intel_crtc_state *old_crtc_state, old_crtc_state->cmrr.cmrr_n != new_crtc_state->cmrr.cmrr_n; } -static bool vrr_enabling(const struct intel_crtc_state *old_crtc_state, - const struct intel_crtc_state *new_crtc_state) +static bool intel_crtc_vrr_enabling(struct intel_atomic_state *state, + struct intel_crtc *crtc) { + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + if (!new_crtc_state->hw.active) return false; @@ -1026,9 +1032,14 @@ static bool vrr_enabling(const struct intel_crtc_state *old_crtc_state, vrr_params_changed(old_crtc_state, new_crtc_state))); } -static bool vrr_disabling(const struct intel_crtc_state *old_crtc_state, - const struct intel_crtc_state *new_crtc_state) +bool intel_crtc_vrr_disabling(struct intel_atomic_state *state, + struct intel_crtc *crtc) { + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + if (!old_crtc_state->hw.active) return false; @@ -1181,7 +1192,7 @@ static void intel_pre_plane_update(struct intel_atomic_state *state, intel_atomic_get_new_crtc_state(state, crtc); enum pipe pipe = crtc->pipe; - if (vrr_disabling(old_crtc_state, new_crtc_state)) { + if (intel_crtc_vrr_disabling(state, crtc)) { intel_vrr_disable(old_crtc_state); intel_crtc_update_active_timings(old_crtc_state, false); } @@ -4669,11 +4680,11 @@ intel_modeset_pipe_config(struct intel_atomic_state *state, crtc_state->fec_enable = limits->force_fec_pipes & BIT(crtc->pipe); crtc_state->max_link_bpp_x16 = limits->max_bpp_x16[crtc->pipe]; - if (crtc_state->pipe_bpp > to_bpp_int(crtc_state->max_link_bpp_x16)) { + if (crtc_state->pipe_bpp > fxp_q4_to_int(crtc_state->max_link_bpp_x16)) { drm_dbg_kms(&i915->drm, - "[CRTC:%d:%s] Link bpp limited to " BPP_X16_FMT "\n", + "[CRTC:%d:%s] Link bpp limited to " FXP_Q4_FMT "\n", crtc->base.base.id, crtc->base.name, - BPP_X16_ARGS(crtc_state->max_link_bpp_x16)); + FXP_Q4_ARGS(crtc_state->max_link_bpp_x16)); crtc_state->bw_constrained = true; } @@ -5100,7 +5111,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, if (current_config->name != pipe_config->name) { \ BUILD_BUG_ON_MSG(!__same_type(current_config->name, bool), \ __stringify(name) " is not bool"); \ - pipe_config_mismatch(&p, fastset, crtc, __stringify(name), \ + pipe_config_mismatch(&p, fastset, crtc, __stringify(name), \ "(expected %s, found %s)", \ str_yes_no(current_config->name), \ str_yes_no(pipe_config->name)); \ @@ -6249,6 +6260,8 @@ static int intel_async_flip_check_hw(struct intel_atomic_state *state, struct in case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: case I915_FORMAT_MOD_4_TILED: + case I915_FORMAT_MOD_4_TILED_BMG_CCS: + case I915_FORMAT_MOD_4_TILED_LNL_CCS: break; default: drm_dbg_kms(&i915->drm, @@ -6830,8 +6843,6 @@ static void commit_pipe_post_planes(struct intel_atomic_state *state, struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - const struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(state, crtc); const struct intel_crtc_state *new_crtc_state = intel_atomic_get_new_crtc_state(state, crtc); @@ -6844,7 +6855,7 @@ static void commit_pipe_post_planes(struct intel_atomic_state *state, !intel_crtc_needs_modeset(new_crtc_state)) skl_detach_scalers(new_crtc_state); - if (vrr_enabling(old_crtc_state, new_crtc_state)) + if (intel_crtc_vrr_enabling(state, crtc)) intel_vrr_enable(new_crtc_state); } @@ -6944,7 +6955,7 @@ static void intel_update_crtc(struct intel_atomic_state *state, * * FIXME Should be synchronized with the start of vblank somehow... */ - if (vrr_enabling(old_crtc_state, new_crtc_state) || + if (intel_crtc_vrr_enabling(state, crtc) || new_crtc_state->update_m_n || new_crtc_state->update_lrr) intel_crtc_update_active_timings(new_crtc_state, new_crtc_state->vrr.enable); @@ -7502,7 +7513,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) * * FIXME get rid of this funny new->old swapping */ - old_crtc_state->dsb = fetch_and_zero(&new_crtc_state->dsb); + old_crtc_state->dsb_color_vblank = fetch_and_zero(&new_crtc_state->dsb_color_vblank); + old_crtc_state->dsb_color_commit = fetch_and_zero(&new_crtc_state->dsb_color_commit); } /* Underruns don't always raise interrupts, so check manually */ @@ -7777,6 +7789,7 @@ bool assert_port_valid(struct drm_i915_private *i915, enum port port) void intel_setup_outputs(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct intel_encoder *encoder; bool dpd_is_edp = false; @@ -7789,7 +7802,7 @@ void intel_setup_outputs(struct drm_i915_private *dev_priv) if (intel_ddi_crt_present(dev_priv)) intel_crt_init(dev_priv); - intel_bios_for_each_encoder(dev_priv, intel_ddi_init); + intel_bios_for_each_encoder(display, intel_ddi_init); if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) vlv_dsi_init(dev_priv); @@ -7851,14 +7864,14 @@ void intel_setup_outputs(struct drm_i915_private *dev_priv) * HDMI ports that the VBT claim are DP or eDP. */ has_edp = intel_dp_is_port_edp(dev_priv, PORT_B); - has_port = intel_bios_is_port_present(dev_priv, PORT_B); + has_port = intel_bios_is_port_present(display, PORT_B); if (intel_de_read(dev_priv, VLV_DP_B) & DP_DETECTED || has_port) has_edp &= g4x_dp_init(dev_priv, VLV_DP_B, PORT_B); if ((intel_de_read(dev_priv, VLV_HDMIB) & SDVO_DETECTED || has_port) && !has_edp) g4x_hdmi_init(dev_priv, VLV_HDMIB, PORT_B); has_edp = intel_dp_is_port_edp(dev_priv, PORT_C); - has_port = intel_bios_is_port_present(dev_priv, PORT_C); + has_port = intel_bios_is_port_present(display, PORT_C); if (intel_de_read(dev_priv, VLV_DP_C) & DP_DETECTED || has_port) has_edp &= g4x_dp_init(dev_priv, VLV_DP_C, PORT_C); if ((intel_de_read(dev_priv, VLV_HDMIC) & SDVO_DETECTED || has_port) && !has_edp) @@ -7869,7 +7882,7 @@ void intel_setup_outputs(struct drm_i915_private *dev_priv) * eDP not supported on port D, * so no need to worry about it */ - has_port = intel_bios_is_port_present(dev_priv, PORT_D); + has_port = intel_bios_is_port_present(display, PORT_D); if (intel_de_read(dev_priv, CHV_DP_D) & DP_DETECTED || has_port) g4x_dp_init(dev_priv, CHV_DP_D, PORT_D); if (intel_de_read(dev_priv, CHV_HDMID) & SDVO_DETECTED || has_port) @@ -7923,7 +7936,7 @@ void intel_setup_outputs(struct drm_i915_private *dev_priv) g4x_dp_init(dev_priv, DP_D, PORT_D); if (SUPPORTS_TV(dev_priv)) - intel_tv_init(dev_priv); + intel_tv_init(display); } else if (DISPLAY_VER(dev_priv) == 2) { if (IS_I85X(dev_priv)) intel_lvds_init(dev_priv); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index b0cf6ca70952..b21d9578d5db 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -532,6 +532,9 @@ void intel_plane_fixup_bitmasks(struct intel_crtc_state *crtc_state); void intel_update_watermarks(struct drm_i915_private *i915); +bool intel_crtc_vrr_disabling(struct intel_atomic_state *state, + struct intel_crtc *crtc); + /* modesetting */ int intel_modeset_pipes_in_mask_early(struct intel_atomic_state *state, const char *reason, u8 pipe_mask); diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index 7715fc329057..0a711114ff2b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -237,7 +237,7 @@ struct intel_vbt_data { struct sdvo_device_mapping { u8 initialized; u8 dvo_port; - u8 slave_addr; + u8 target_addr; u8 dvo_wiring; u8 i2c_pin; u8 ddc_pin; diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 91757fed9c6d..74f527647aa9 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -36,6 +36,7 @@ #include "intel_pps.h" #include "intel_psr.h" #include "intel_psr_regs.h" +#include "intel_vdsc.h" #include "intel_wm.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) @@ -492,7 +493,7 @@ static void crtc_updates_info(struct seq_file *m, seq_printf(m, "%sMax update: %lluns\n", hdr, crtc->debug.vbl.max); seq_printf(m, "%sAverage update: %lluns\n", - hdr, div64_u64(crtc->debug.vbl.sum, count)); + hdr, div64_u64(crtc->debug.vbl.sum, count)); seq_printf(m, "%sOverruns > %uus: %u\n", hdr, VBLANK_EVASION_TIME_US, crtc->debug.vbl.over); } @@ -551,6 +552,7 @@ static void crtc_updates_add(struct intel_crtc *crtc) static void intel_crtc_info(struct seq_file *m, struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_printer p = drm_seq_file_printer(m); const struct intel_crtc_state *crtc_state = to_intel_crtc_state(crtc->base.state); struct intel_encoder *encoder; @@ -581,6 +583,8 @@ static void intel_crtc_info(struct seq_file *m, struct intel_crtc *crtc) crtc_state->joiner_pipes, intel_crtc_is_joiner_secondary(crtc_state) ? "slave" : "master"); + intel_vdsc_state_dump(&p, 1, crtc_state); + for_each_intel_encoder_mask(&dev_priv->drm, encoder, crtc_state->uapi.encoder_mask) intel_encoder_info(m, crtc, encoder); @@ -1008,7 +1012,7 @@ i915_fifo_underrun_reset_write(struct file *filp, return ret; } - intel_fbc_reset_underrun(dev_priv); + intel_fbc_reset_underrun(&dev_priv->display); return cnt; } @@ -1045,6 +1049,7 @@ static const struct { void intel_display_debugfs_register(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; struct drm_minor *minor = i915->drm.primary; int i; @@ -1060,15 +1065,15 @@ void intel_display_debugfs_register(struct drm_i915_private *i915) ARRAY_SIZE(intel_display_debugfs_list), minor->debugfs_root, minor); - intel_bios_debugfs_register(i915); + intel_bios_debugfs_register(display); intel_cdclk_debugfs_register(i915); intel_dmc_debugfs_register(i915); - intel_fbc_debugfs_register(i915); + intel_fbc_debugfs_register(display); intel_hpd_debugfs_register(i915); - intel_opregion_debugfs_register(i915); + intel_opregion_debugfs_register(display); intel_psr_debugfs_register(i915); intel_wm_debugfs_register(i915); - intel_display_debugfs_params(i915); + intel_display_debugfs_params(display); } static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c index f35718748555..ec3ed29a83c9 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.c @@ -151,13 +151,13 @@ intel_display_debugfs_create_uint(const char *name, umode_t mode, } while (0) /* add a subdirectory with files for each intel display param */ -void intel_display_debugfs_params(struct drm_i915_private *i915) +void intel_display_debugfs_params(struct intel_display *display) { - struct drm_minor *minor = i915->drm.primary; + struct drm_minor *minor = display->drm->primary; struct dentry *dir; char dirname[16]; - snprintf(dirname, sizeof(dirname), "%s_params", i915->drm.driver->name); + snprintf(dirname, sizeof(dirname), "%s_params", display->drm->driver->name); dir = debugfs_lookup(dirname, minor->debugfs_root); if (!dir) dir = debugfs_create_dir(dirname, minor->debugfs_root); @@ -171,7 +171,7 @@ void intel_display_debugfs_params(struct drm_i915_private *i915) */ #define REGISTER(T, x, unused, mode, ...) _intel_display_param_create_file( \ - dir, #x, mode, &i915->display.params.x); + dir, #x, mode, &display->params.x); INTEL_DISPLAY_PARAMS_FOR_EACH(REGISTER); #undef REGISTER } diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h index 1e9945a4044c..a1120915a5a8 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs_params.h @@ -6,8 +6,8 @@ #ifndef __INTEL_DISPLAY_DEBUGFS_PARAMS__ #define __INTEL_DISPLAY_DEBUGFS_PARAMS__ -struct drm_i915_private; +struct intel_display; -void intel_display_debugfs_params(struct drm_i915_private *i915); +void intel_display_debugfs_params(struct intel_display *display); #endif /* __INTEL_DISPLAY_DEBUGFS_PARAMS__ */ diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c index dd7dce4b0e7a..1b46ba985580 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.c +++ b/drivers/gpu/drm/i915/display/intel_display_device.c @@ -16,14 +16,25 @@ #include "intel_display_power.h" #include "intel_display_reg_defs.h" #include "intel_fbc.h" +#include "intel_step.h" __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field initialization overrides for display info"); +struct stepping_desc { + const enum intel_step *map; /* revid to step map */ + size_t size; /* map size */ +}; + +#define STEP_INFO(_map) \ + .step_info.map = _map, \ + .step_info.size = ARRAY_SIZE(_map) + struct subplatform_desc { enum intel_display_subplatform subplatform; const char *name; const u16 *pciidlist; + struct stepping_desc step_info; }; struct platform_desc { @@ -31,6 +42,7 @@ struct platform_desc { const char *name; const struct subplatform_desc *subplatforms; const struct intel_display_device_info *info; /* NULL for GMD ID */ + struct stepping_desc step_info; }; #define PLATFORM(_platform) \ @@ -610,6 +622,13 @@ static const u16 skl_ulx_ids[] = { 0 }; +static const enum intel_step skl_steppings[] = { + [0x6] = STEP_G0, + [0x7] = STEP_H0, + [0x9] = STEP_J0, + [0xA] = STEP_I1, +}; + static const struct platform_desc skl_desc = { PLATFORM(SKYLAKE), .subplatforms = (const struct subplatform_desc[]) { @@ -618,6 +637,7 @@ static const struct platform_desc skl_desc = { {}, }, .info = &skl_display, + STEP_INFO(skl_steppings), }; static const u16 kbl_ult_ids[] = { @@ -634,6 +654,16 @@ static const u16 kbl_ulx_ids[] = { 0 }; +static const enum intel_step kbl_steppings[] = { + [1] = STEP_B0, + [2] = STEP_B0, + [3] = STEP_B0, + [4] = STEP_C0, + [5] = STEP_B1, + [6] = STEP_B1, + [7] = STEP_C0, +}; + static const struct platform_desc kbl_desc = { PLATFORM(KABYLAKE), .subplatforms = (const struct subplatform_desc[]) { @@ -642,6 +672,7 @@ static const struct platform_desc kbl_desc = { {}, }, .info = &skl_display, + STEP_INFO(kbl_steppings), }; static const u16 cfl_ult_ids[] = { @@ -706,6 +737,13 @@ static const struct platform_desc cml_desc = { BIT(TRANSCODER_DSI_A) | BIT(TRANSCODER_DSI_C), \ .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) +static const enum intel_step bxt_steppings[] = { + [0xA] = STEP_C0, + [0xB] = STEP_C0, + [0xC] = STEP_D0, + [0xD] = STEP_E0, +}; + static const struct platform_desc bxt_desc = { PLATFORM(BROXTON), .info = &(const struct intel_display_device_info) { @@ -714,6 +752,11 @@ static const struct platform_desc bxt_desc = { .__runtime_defaults.ip.ver = 9, }, + STEP_INFO(bxt_steppings), +}; + +static const enum intel_step glk_steppings[] = { + [3] = STEP_B0, }; static const struct platform_desc glk_desc = { @@ -725,6 +768,7 @@ static const struct platform_desc glk_desc = { .__runtime_defaults.ip.ver = 10, }, + STEP_INFO(glk_steppings), }; #define ICL_DISPLAY \ @@ -773,6 +817,10 @@ static const u16 icl_port_f_ids[] = { 0 }; +static const enum intel_step icl_steppings[] = { + [7] = STEP_D0, +}; + static const struct platform_desc icl_desc = { PLATFORM(ICELAKE), .subplatforms = (const struct subplatform_desc[]) { @@ -784,6 +832,7 @@ static const struct platform_desc icl_desc = { .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) | BIT(PORT_D) | BIT(PORT_E), }, + STEP_INFO(icl_steppings), }; static const struct intel_display_device_info jsl_ehl_display = { @@ -792,14 +841,21 @@ static const struct intel_display_device_info jsl_ehl_display = { .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) | BIT(PORT_D), }; +static const enum intel_step jsl_ehl_steppings[] = { + [0] = STEP_A0, + [1] = STEP_B0, +}; + static const struct platform_desc jsl_desc = { PLATFORM(JASPERLAKE), .info = &jsl_ehl_display, + STEP_INFO(jsl_ehl_steppings), }; static const struct platform_desc ehl_desc = { PLATFORM(ELKHARTLAKE), .info = &jsl_ehl_display, + STEP_INFO(jsl_ehl_steppings), }; #define XE_D_DISPLAY \ @@ -850,10 +906,23 @@ static const u16 tgl_uy_ids[] = { 0 }; +static const enum intel_step tgl_steppings[] = { + [0] = STEP_B0, + [1] = STEP_D0, +}; + +static const enum intel_step tgl_uy_steppings[] = { + [0] = STEP_A0, + [1] = STEP_C0, + [2] = STEP_C0, + [3] = STEP_D0, +}; + static const struct platform_desc tgl_desc = { PLATFORM(TIGERLAKE), .subplatforms = (const struct subplatform_desc[]) { - { INTEL_DISPLAY_TIGERLAKE_UY, "UY", tgl_uy_ids }, + { INTEL_DISPLAY_TIGERLAKE_UY, "UY", tgl_uy_ids, + STEP_INFO(tgl_uy_steppings) }, {}, }, .info = &(const struct intel_display_device_info) { @@ -866,6 +935,12 @@ static const struct platform_desc tgl_desc = { .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_TC1) | BIT(PORT_TC2) | BIT(PORT_TC3) | BIT(PORT_TC4) | BIT(PORT_TC5) | BIT(PORT_TC6), }, + STEP_INFO(tgl_steppings), +}; + +static const enum intel_step dg1_steppings[] = { + [0] = STEP_A0, + [1] = STEP_B0, }; static const struct platform_desc dg1_desc = { @@ -876,6 +951,13 @@ static const struct platform_desc dg1_desc = { .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_TC1) | BIT(PORT_TC2), }, + STEP_INFO(dg1_steppings), +}; + +static const enum intel_step rkl_steppings[] = { + [0] = STEP_A0, + [1] = STEP_B0, + [4] = STEP_C0, }; static const struct platform_desc rkl_desc = { @@ -892,6 +974,7 @@ static const struct platform_desc rkl_desc = { .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_TC1) | BIT(PORT_TC2), }, + STEP_INFO(rkl_steppings), }; static const u16 adls_rpls_ids[] = { @@ -899,10 +982,24 @@ static const u16 adls_rpls_ids[] = { 0 }; +static const enum intel_step adl_s_steppings[] = { + [0x0] = STEP_A0, + [0x1] = STEP_A2, + [0x4] = STEP_B0, + [0x8] = STEP_B0, + [0xC] = STEP_C0, +}; + +static const enum intel_step adl_s_rpl_s_steppings[] = { + [0x4] = STEP_D0, + [0xC] = STEP_C0, +}; + static const struct platform_desc adl_s_desc = { PLATFORM(ALDERLAKE_S), .subplatforms = (const struct subplatform_desc[]) { - { INTEL_DISPLAY_ALDERLAKE_S_RAPTORLAKE_S, "RPL-S", adls_rpls_ids }, + { INTEL_DISPLAY_ALDERLAKE_S_RAPTORLAKE_S, "RPL-S", adls_rpls_ids, + STEP_INFO(adl_s_rpl_s_steppings) }, {}, }, .info = &(const struct intel_display_device_info) { @@ -913,6 +1010,7 @@ static const struct platform_desc adl_s_desc = { .__runtime_defaults.port_mask = BIT(PORT_A) | BIT(PORT_TC1) | BIT(PORT_TC2) | BIT(PORT_TC3) | BIT(PORT_TC4), }, + STEP_INFO(adl_s_steppings), }; #define XE_LPD_FEATURES \ @@ -986,15 +1084,34 @@ static const u16 adlp_rplp_ids[] = { 0 }; +static const enum intel_step adl_p_steppings[] = { + [0x0] = STEP_A0, + [0x4] = STEP_B0, + [0x8] = STEP_C0, + [0xC] = STEP_D0, +}; + +static const enum intel_step adl_p_adl_n_steppings[] = { + [0x0] = STEP_D0, +}; + +static const enum intel_step adl_p_rpl_pu_steppings[] = { + [0x4] = STEP_E0, +}; + static const struct platform_desc adl_p_desc = { PLATFORM(ALDERLAKE_P), .subplatforms = (const struct subplatform_desc[]) { - { INTEL_DISPLAY_ALDERLAKE_P_ALDERLAKE_N, "ADL-N", adlp_adln_ids }, - { INTEL_DISPLAY_ALDERLAKE_P_RAPTORLAKE_U, "RPL-U", adlp_rplu_ids }, - { INTEL_DISPLAY_ALDERLAKE_P_RAPTORLAKE_P, "RPL-P", adlp_rplp_ids }, + { INTEL_DISPLAY_ALDERLAKE_P_ALDERLAKE_N, "ADL-N", adlp_adln_ids, + STEP_INFO(adl_p_adl_n_steppings) }, + { INTEL_DISPLAY_ALDERLAKE_P_RAPTORLAKE_P, "RPL-P", adlp_rplp_ids, + STEP_INFO(adl_p_rpl_pu_steppings) }, + { INTEL_DISPLAY_ALDERLAKE_P_RAPTORLAKE_U, "RPL-U", adlp_rplu_ids, + STEP_INFO(adl_p_rpl_pu_steppings) }, {}, }, .info = &xe_lpd_display, + STEP_INFO(adl_p_steppings), }; static const struct intel_display_device_info xe_hpd_display = { @@ -1023,12 +1140,33 @@ static const u16 dg2_g12_ids[] = { 0 }; +static const enum intel_step dg2_g10_steppings[] = { + [0x0] = STEP_A0, + [0x1] = STEP_A0, + [0x4] = STEP_B0, + [0x8] = STEP_C0, +}; + +static const enum intel_step dg2_g11_steppings[] = { + [0x0] = STEP_B0, + [0x4] = STEP_C0, + [0x5] = STEP_C0, +}; + +static const enum intel_step dg2_g12_steppings[] = { + [0x0] = STEP_C0, + [0x1] = STEP_C0, +}; + static const struct platform_desc dg2_desc = { PLATFORM(DG2), .subplatforms = (const struct subplatform_desc[]) { - { INTEL_DISPLAY_DG2_G10, "G10", dg2_g10_ids }, - { INTEL_DISPLAY_DG2_G11, "G11", dg2_g11_ids }, - { INTEL_DISPLAY_DG2_G12, "G12", dg2_g12_ids }, + { INTEL_DISPLAY_DG2_G10, "G10", dg2_g10_ids, + STEP_INFO(dg2_g10_steppings) }, + { INTEL_DISPLAY_DG2_G11, "G11", dg2_g11_ids, + STEP_INFO(dg2_g11_steppings) }, + { INTEL_DISPLAY_DG2_G12, "G12", dg2_g12_ids, + STEP_INFO(dg2_g12_steppings) }, {}, }, .info = &xe_hpd_display, @@ -1261,13 +1399,66 @@ find_subplatform_desc(struct pci_dev *pdev, const struct platform_desc *desc) return NULL; } +static enum intel_step get_pre_gmdid_step(struct intel_display *display, + const struct stepping_desc *main, + const struct stepping_desc *sub) +{ + struct pci_dev *pdev = to_pci_dev(display->drm->dev); + const enum intel_step *map = main->map; + int size = main->size; + int revision = pdev->revision; + enum intel_step step; + + /* subplatform stepping info trumps main platform info */ + if (sub && sub->map && sub->size) { + map = sub->map; + size = sub->size; + } + + /* not all platforms define steppings, and it's fine */ + if (!map || !size) + return STEP_NONE; + + if (revision < size && map[revision] != STEP_NONE) { + step = map[revision]; + } else { + drm_warn(display->drm, "Unknown revision 0x%02x\n", revision); + + /* + * If we hit a gap in the revision to step map, use the information + * for the next revision. + * + * This may be wrong in all sorts of ways, especially if the + * steppings in the array are not monotonically increasing, but + * it's better than defaulting to 0. + */ + while (revision < size && map[revision] == STEP_NONE) + revision++; + + if (revision < size) { + drm_dbg_kms(display->drm, "Using display stepping for revision 0x%02x\n", + revision); + step = map[revision]; + } else { + drm_dbg_kms(display->drm, "Using future display stepping\n"); + step = STEP_FUTURE; + } + } + + drm_WARN_ON(display->drm, step == STEP_NONE); + + return step; +} + void intel_display_device_probe(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; struct pci_dev *pdev = to_pci_dev(i915->drm.dev); const struct intel_display_device_info *info; struct intel_display_ip_ver ip_ver = {}; const struct platform_desc *desc; const struct subplatform_desc *subdesc; + enum intel_step step; /* Add drm device backpointer as early as possible. */ i915->display.drm = &i915->drm; @@ -1307,13 +1498,25 @@ void intel_display_device_probe(struct drm_i915_private *i915) DISPLAY_RUNTIME_INFO(i915)->subplatform = subdesc->subplatform; } - if (ip_ver.ver || ip_ver.rel || ip_ver.step) + if (ip_ver.ver || ip_ver.rel || ip_ver.step) { DISPLAY_RUNTIME_INFO(i915)->ip = ip_ver; + step = STEP_A0 + ip_ver.step; + if (step > STEP_FUTURE) { + drm_dbg_kms(display->drm, "Using future display stepping\n"); + step = STEP_FUTURE; + } + } else { + step = get_pre_gmdid_step(display, &desc->step_info, + subdesc ? &subdesc->step_info : NULL); + } - drm_info(&i915->drm, "Found %s%s%s (device ID %04x) display version %u.%02u\n", + DISPLAY_RUNTIME_INFO(i915)->step = step; + + drm_info(&i915->drm, "Found %s%s%s (device ID %04x) display version %u.%02u stepping %s\n", desc->name, subdesc ? "/" : "", subdesc ? subdesc->name : "", pdev->device, DISPLAY_RUNTIME_INFO(i915)->ip.ver, - DISPLAY_RUNTIME_INFO(i915)->ip.rel); + DISPLAY_RUNTIME_INFO(i915)->ip.rel, + step != STEP_NONE ? intel_step_name(step) : "N/A"); return; @@ -1474,6 +1677,9 @@ static void __intel_display_device_info_runtime_init(struct drm_i915_private *i9 } } + display_runtime->rawclk_freq = intel_read_rawclk(i915); + drm_dbg_kms(&i915->drm, "rawclk rate: %d kHz\n", display_runtime->rawclk_freq); + return; display_fused_off: @@ -1509,6 +1715,8 @@ void intel_display_device_info_print(const struct intel_display_device_info *inf drm_printf(p, "display version: %u\n", runtime->ip.ver); + drm_printf(p, "display stepping: %s\n", intel_step_name(runtime->step)); + #define PRINT_FLAG(name) drm_printf(p, "%s: %s\n", #name, str_yes_no(info->name)) DEV_INFO_DISPLAY_FOR_EACH_FLAG(PRINT_FLAG); #undef PRINT_FLAG @@ -1516,6 +1724,8 @@ void intel_display_device_info_print(const struct intel_display_device_info *inf drm_printf(p, "has_hdcp: %s\n", str_yes_no(runtime->has_hdcp)); drm_printf(p, "has_dmc: %s\n", str_yes_no(runtime->has_dmc)); drm_printf(p, "has_dsc: %s\n", str_yes_no(runtime->has_dsc)); + + drm_printf(p, "rawclk rate: %u kHz\n", runtime->rawclk_freq); } /* @@ -1529,9 +1739,11 @@ void intel_display_device_info_print(const struct intel_display_device_info *inf */ bool intel_display_device_enabled(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; + /* Only valid when HAS_DISPLAY() is true */ - drm_WARN_ON(&i915->drm, !HAS_DISPLAY(i915)); + drm_WARN_ON(display->drm, !HAS_DISPLAY(display)); - return !i915->display.params.disable_display && - !intel_opregion_headless_sku(i915); + return !display->params.disable_display && + !intel_opregion_headless_sku(display); } diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 13453ea4daea..dfb0c8bf5ca2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -161,7 +161,7 @@ enum intel_display_subplatform { #define SUPPORTS_TV(i915) (DISPLAY_INFO(i915)->supports_tv) /* Check that device has a display IP version within the specific range. */ -#define IS_DISPLAY_IP_RANGE(__i915, from, until) ( \ +#define IS_DISPLAY_VER_FULL(__i915, from, until) ( \ BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \ (DISPLAY_VER_FULL(__i915) >= (from) && \ DISPLAY_VER_FULL(__i915) <= (until))) @@ -175,14 +175,14 @@ enum intel_display_subplatform { * hardware fix is present and the software workaround is no longer necessary. * E.g., * - * IS_DISPLAY_IP_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_B2) - * IS_DISPLAY_IP_STEP(i915, IP_VER(14, 0), STEP_C0, STEP_FOREVER) + * IS_DISPLAY_VER_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_B2) + * IS_DISPLAY_VER_STEP(i915, IP_VER(14, 0), STEP_C0, STEP_FOREVER) * * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper * stepping bound for the specified IP version. */ -#define IS_DISPLAY_IP_STEP(__i915, ipver, from, until) \ - (IS_DISPLAY_IP_RANGE((__i915), (ipver), (ipver)) && \ +#define IS_DISPLAY_VER_STEP(__i915, ipver, from, until) \ + (IS_DISPLAY_VER_FULL((__i915), (ipver), (ipver)) && \ IS_DISPLAY_STEP((__i915), (from), (until))) #define DISPLAY_INFO(i915) (__to_intel_display(i915)->info.__device_info) @@ -194,6 +194,12 @@ enum intel_display_subplatform { #define IS_DISPLAY_VER(i915, from, until) \ (DISPLAY_VER(i915) >= (from) && DISPLAY_VER(i915) <= (until)) +#define INTEL_DISPLAY_STEP(__i915) (DISPLAY_RUNTIME_INFO(__i915)->step) + +#define IS_DISPLAY_STEP(__i915, since, until) \ + (drm_WARN_ON(__to_intel_display(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \ + INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) < (until)) + struct intel_display_runtime_info { enum intel_display_platform platform; enum intel_display_subplatform subplatform; @@ -201,8 +207,11 @@ struct intel_display_runtime_info { struct intel_display_ip_ver { u16 ver; u16 rel; - u16 step; + u16 step; /* hardware */ } ip; + int step; /* symbolic */ + + u32 rawclk_freq; u8 pipe_mask; u8 cpu_transcoder_mask; diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 794b4af38055..eced20d2ce6e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -217,7 +217,7 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915) return ret; } - intel_bios_init(i915); + intel_bios_init(display); ret = intel_vga_register(i915); if (ret) @@ -265,7 +265,7 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915) intel_init_quirks(display); - intel_fbc_init(i915); + intel_fbc_init(display); return 0; @@ -275,7 +275,7 @@ cleanup_vga_client_pw_domain_dmc: cleanup_vga: intel_vga_unregister(i915); cleanup_bios: - intel_bios_driver_remove(i915); + intel_bios_driver_remove(display); return ret; } @@ -416,7 +416,8 @@ bool intel_display_driver_check_access(struct drm_i915_private *i915) /* part #2: call after irq install, but before gem init */ int intel_display_driver_probe_nogem(struct drm_i915_private *i915) { - struct drm_device *dev = &i915->drm; + struct intel_display *display = &i915->display; + struct drm_device *dev = display->drm; enum pipe pipe; int ret; @@ -452,7 +453,7 @@ int intel_display_driver_probe_nogem(struct drm_i915_private *i915) if (i915->display.cdclk.max_cdclk_freq == 0) intel_update_max_cdclk(i915); - intel_hti_init(i915); + intel_hti_init(display); /* Just disable it once at startup */ intel_vga_disable(i915); @@ -466,7 +467,7 @@ int intel_display_driver_probe_nogem(struct drm_i915_private *i915) drm_modeset_lock_all(dev); intel_modeset_setup_hw_state(i915, dev->mode_config.acquire_ctx); - intel_acpi_assign_connector_fwnodes(i915); + intel_acpi_assign_connector_fwnodes(display); drm_modeset_unlock_all(dev); intel_initial_plane_config(i915); @@ -526,6 +527,7 @@ int intel_display_driver_probe(struct drm_i915_private *i915) void intel_display_driver_register(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; struct drm_printer p = drm_dbg_printer(&i915->drm, DRM_UT_KMS, "i915 display info:"); @@ -533,8 +535,8 @@ void intel_display_driver_register(struct drm_i915_private *i915) return; /* Must be done after probing outputs */ - intel_opregion_register(i915); - intel_acpi_video_register(i915); + intel_opregion_register(display); + intel_acpi_video_register(display); intel_audio_init(i915); @@ -607,23 +609,27 @@ void intel_display_driver_remove_noirq(struct drm_i915_private *i915) destroy_workqueue(i915->display.wq.flip); destroy_workqueue(i915->display.wq.modeset); - intel_fbc_cleanup(i915); + intel_fbc_cleanup(&i915->display); } /* part #3: call after gem init */ void intel_display_driver_remove_nogem(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; + intel_dmc_fini(i915); intel_power_domains_driver_remove(i915); intel_vga_unregister(i915); - intel_bios_driver_remove(i915); + intel_bios_driver_remove(display); } void intel_display_driver_unregister(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; + if (!HAS_DISPLAY(i915)) return; @@ -643,7 +649,7 @@ void intel_display_driver_unregister(struct drm_i915_private *i915) drm_atomic_helper_shutdown(&i915->drm); acpi_video_unregister(); - intel_opregion_unregister(i915); + intel_opregion_unregister(display); } /* diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c b/drivers/gpu/drm/i915/display/intel_display_irq.c index 5219ba295c74..d85c33eabc47 100644 --- a/drivers/gpu/drm/i915/display/intel_display_irq.c +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c @@ -14,6 +14,7 @@ #include "intel_display_trace.h" #include "intel_display_types.h" #include "intel_dp_aux.h" +#include "intel_dsb.h" #include "intel_fdi_regs.h" #include "intel_fifo_underrun.h" #include "intel_gmbus.h" @@ -270,10 +271,12 @@ void i915_disable_pipestat(struct drm_i915_private *dev_priv, static bool i915_has_asle(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; + if (!IS_PINEVIEW(i915) && !IS_MOBILE(i915)) return false; - return intel_opregion_asle_present(i915); + return intel_opregion_asle_present(display); } /** @@ -497,6 +500,8 @@ void i8xx_pipestat_irq_handler(struct drm_i915_private *dev_priv, void i915_pipestat_irq_handler(struct drm_i915_private *dev_priv, u32 iir, u32 pipe_stats[I915_MAX_PIPES]) { + struct intel_display *display = &dev_priv->display; + bool blc_event = false; enum pipe pipe; @@ -515,12 +520,13 @@ void i915_pipestat_irq_handler(struct drm_i915_private *dev_priv, } if (blc_event || (iir & I915_ASLE_INTERRUPT)) - intel_opregion_asle_intr(dev_priv); + intel_opregion_asle_intr(display); } void i965_pipestat_irq_handler(struct drm_i915_private *dev_priv, u32 iir, u32 pipe_stats[I915_MAX_PIPES]) { + struct intel_display *display = &dev_priv->display; bool blc_event = false; enum pipe pipe; @@ -539,7 +545,7 @@ void i965_pipestat_irq_handler(struct drm_i915_private *dev_priv, } if (blc_event || (iir & I915_ASLE_INTERRUPT)) - intel_opregion_asle_intr(dev_priv); + intel_opregion_asle_intr(display); if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS) intel_gmbus_irq_handler(dev_priv); @@ -695,6 +701,7 @@ static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) void ilk_display_irq_handler(struct drm_i915_private *dev_priv, u32 de_iir) { + struct intel_display *display = &dev_priv->display; enum pipe pipe; u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG; @@ -705,7 +712,7 @@ void ilk_display_irq_handler(struct drm_i915_private *dev_priv, u32 de_iir) intel_dp_aux_irq_handler(dev_priv); if (de_iir & DE_GSE) - intel_opregion_asle_intr(dev_priv); + intel_opregion_asle_intr(display); if (de_iir & DE_POISON) drm_err(&dev_priv->drm, "Poison interrupt\n"); @@ -743,6 +750,7 @@ void ilk_display_irq_handler(struct drm_i915_private *dev_priv, u32 de_iir) void ivb_display_irq_handler(struct drm_i915_private *dev_priv, u32 de_iir) { + struct intel_display *display = &dev_priv->display; enum pipe pipe; u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG_IVB; @@ -770,7 +778,7 @@ void ivb_display_irq_handler(struct drm_i915_private *dev_priv, u32 de_iir) intel_dp_aux_irq_handler(dev_priv); if (de_iir & DE_GSE_IVB) - intel_opregion_asle_intr(dev_priv); + intel_opregion_asle_intr(display); for_each_pipe(dev_priv, pipe) { if (de_iir & DE_PIPE_VBLANK_IVB(pipe)) @@ -894,6 +902,7 @@ static void intel_pmdemand_irq_handler(struct drm_i915_private *dev_priv) static void gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir) { + struct intel_display *display = &dev_priv->display; bool found = false; if (DISPLAY_VER(dev_priv) >= 14) { @@ -906,8 +915,15 @@ gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir) intel_pmdemand_irq_handler(dev_priv); found = true; } + + if (iir & XELPDP_RM_TIMEOUT) { + u32 val = intel_uncore_read(&dev_priv->uncore, + RM_TIMEOUT_REG_CAPTURE); + drm_warn(&dev_priv->drm, "Register Access Timeout = 0x%x\n", val); + found = true; + } } else if (iir & GEN8_DE_MISC_GSE) { - intel_opregion_asle_intr(dev_priv); + intel_opregion_asle_intr(display); found = true; } @@ -1149,6 +1165,17 @@ void gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) if (iir & gen8_de_pipe_flip_done_mask(dev_priv)) flip_done_handler(dev_priv, pipe); + if (HAS_DSB(dev_priv)) { + if (iir & GEN12_DSB_INT(INTEL_DSB_0)) + intel_dsb_irq_handler(&dev_priv->display, pipe, INTEL_DSB_0); + + if (iir & GEN12_DSB_INT(INTEL_DSB_1)) + intel_dsb_irq_handler(&dev_priv->display, pipe, INTEL_DSB_1); + + if (iir & GEN12_DSB_INT(INTEL_DSB_2)) + intel_dsb_irq_handler(&dev_priv->display, pipe, INTEL_DSB_2); + } + if (iir & GEN8_PIPE_CDCLK_CRC_DONE) hsw_pipe_crc_irq_handler(dev_priv, pipe); @@ -1211,8 +1238,10 @@ u32 gen11_gu_misc_irq_ack(struct drm_i915_private *i915, const u32 master_ctl) void gen11_gu_misc_irq_handler(struct drm_i915_private *i915, const u32 iir) { + struct intel_display *display = &i915->display; + if (iir & GEN11_GU_MISC_GSE) - intel_opregion_asle_intr(i915); + intel_opregion_asle_intr(display); } void gen11_display_irq_handler(struct drm_i915_private *i915) @@ -1680,6 +1709,7 @@ static void icp_irq_postinstall(struct drm_i915_private *i915); void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct intel_uncore *uncore = &dev_priv->uncore; u32 de_pipe_masked = gen8_de_pipe_fault_mask(dev_priv) | @@ -1710,14 +1740,19 @@ void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (DISPLAY_VER(dev_priv) >= 14) { de_misc_masked |= XELPDP_PMDEMAND_RSPTOUT_ERR | - XELPDP_PMDEMAND_RSP; + XELPDP_PMDEMAND_RSP | XELPDP_RM_TIMEOUT; } else if (DISPLAY_VER(dev_priv) >= 11) { enum port port; - if (intel_bios_is_dsi_present(dev_priv, &port)) + if (intel_bios_is_dsi_present(display, &port)) de_port_masked |= DSI0_TE | DSI1_TE; } + if (HAS_DSB(dev_priv)) + de_pipe_masked |= GEN12_DSB_INT(INTEL_DSB_0) | + GEN12_DSB_INT(INTEL_DSB_1) | + GEN12_DSB_INT(INTEL_DSB_2); + de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK | gen8_de_pipe_underrun_mask(dev_priv) | diff --git a/drivers/gpu/drm/i915/display/intel_display_params.c b/drivers/gpu/drm/i915/display/intel_display_params.c index e82bd72d32fa..1a45d300b6f0 100644 --- a/drivers/gpu/drm/i915/display/intel_display_params.c +++ b/drivers/gpu/drm/i915/display/intel_display_params.c @@ -173,14 +173,14 @@ static void _param_print_charp(struct drm_printer *p, const char *driver_name, /** * intel_display_params_dump - dump intel display modparams - * @i915: i915 device + * @display: display device * @p: the &drm_printer * * Pretty printer for i915 modparams. */ -void intel_display_params_dump(struct drm_i915_private *i915, struct drm_printer *p) +void intel_display_params_dump(struct intel_display *display, struct drm_printer *p) { -#define PRINT(T, x, ...) _param_print(p, i915->drm.driver->name, #x, i915->display.params.x); +#define PRINT(T, x, ...) _param_print(p, display->drm->driver->name, #x, display->params.x); INTEL_DISPLAY_PARAMS_FOR_EACH(PRINT); #undef PRINT } diff --git a/drivers/gpu/drm/i915/display/intel_display_params.h b/drivers/gpu/drm/i915/display/intel_display_params.h index 48c29c55c939..da8dc943234b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_params.h +++ b/drivers/gpu/drm/i915/display/intel_display_params.h @@ -9,7 +9,7 @@ #include <linux/types.h> struct drm_printer; -struct drm_i915_private; +struct intel_display; /* * Invoke param, a function-like macro, for each intel display param, with @@ -56,7 +56,7 @@ struct intel_display_params { }; #undef MEMBER -void intel_display_params_dump(struct drm_i915_private *i915, +void intel_display_params_dump(struct intel_display *display, struct drm_printer *p); void intel_display_params_copy(struct intel_display_params *dest); void intel_display_params_free(struct intel_display_params *params); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index e288a1b21d7e..ef2fdbf97346 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -36,7 +36,7 @@ for_each_power_well_reverse(__dev_priv, __power_well) \ for_each_if(test_bit((__domain), (__power_well)->domains.bits)) -const char * +static const char * intel_display_power_domain_str(enum intel_display_power_domain domain) { switch (domain) { @@ -198,20 +198,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) } } -/** - * __intel_display_power_is_enabled - unlocked check for a power domain - * @dev_priv: i915 device instance - * @domain: power domain to check - * - * This is the unlocked version of intel_display_power_is_enabled() and should - * only be used from error capture and recovery code where deadlocks are - * possible. - * - * Returns: - * True when the power domain is enabled, false otherwise. - */ -bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +static bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain) { struct i915_power_well *power_well; bool is_enabled; @@ -1696,7 +1684,7 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, intel_dmc_load_program(dev_priv); /* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p,dg2 */ - if (IS_DISPLAY_IP_RANGE(dev_priv, IP_VER(12, 0), IP_VER(13, 0))) + if (IS_DISPLAY_VER_FULL(dev_priv, IP_VER(12, 0), IP_VER(13, 0))) intel_de_rmw(dev_priv, GEN11_CHICKEN_DCPR_2, 0, DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM | DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR); @@ -1704,6 +1692,14 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, /* Wa_14011503030:xelpd */ if (DISPLAY_VER(dev_priv) == 13) intel_de_write(dev_priv, XELPD_DISPLAY_ERR_FATAL_MASK, ~0); + + /* Wa_15013987218 */ + if (DISPLAY_VER(dev_priv) == 20) { + intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, + 0, PCH_GMBUSUNIT_CLOCK_GATE_DISABLE); + intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, + PCH_GMBUSUNIT_CLOCK_GATE_DISABLE, 0); + } } static void icl_display_core_uninit(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index d6c2a5846bdc..425452c5a469 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -183,13 +183,8 @@ void intel_display_power_resume(struct drm_i915_private *i915); void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv, u32 state); -const char * -intel_display_power_domain_str(enum intel_display_power_domain domain); - bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); -bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain); intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); intel_wakeref_t diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index 919f712fef13..adf5d1fbccb5 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -1176,9 +1176,9 @@ static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv) MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); intel_de_write(dev_priv, CBR1_VLV, 0); - drm_WARN_ON(&dev_priv->drm, RUNTIME_INFO(dev_priv)->rawclk_freq == 0); + drm_WARN_ON(&dev_priv->drm, DISPLAY_RUNTIME_INFO(dev_priv)->rawclk_freq == 0); intel_de_write(dev_priv, RAWCLK_FREQ_VLV, - DIV_ROUND_CLOSEST(RUNTIME_INFO(dev_priv)->rawclk_freq, + DIV_ROUND_CLOSEST(DISPLAY_RUNTIME_INFO(dev_priv)->rawclk_freq, 1000)); } diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 8713835e2307..868ff8976ed9 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1396,8 +1396,8 @@ struct intel_crtc_state { /* Only valid on TGL+ */ enum transcoder mst_master_transcoder; - /* For DSB related info */ - struct intel_dsb *dsb; + /* For DSB based color LUT updates */ + struct intel_dsb *dsb_color_vblank, *dsb_color_commit; u32 psr2_man_track_ctl; @@ -1754,6 +1754,7 @@ struct intel_dp { u8 lane_count; u8 sink_count; bool link_trained; + bool needs_modeset_retry; bool use_max_params; u8 dpcd[DP_RECEIVER_CAP_SIZE]; u8 psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; @@ -1777,10 +1778,30 @@ struct intel_dp { int common_rates[DP_MAX_SUPPORTED_RATES]; struct { /* TODO: move the rest of link specific fields to here */ + /* common rate,lane_count configs in bw order */ + int num_configs; +#define INTEL_DP_MAX_LANE_COUNT 4 +#define INTEL_DP_MAX_SUPPORTED_LANE_CONFIGS (ilog2(INTEL_DP_MAX_LANE_COUNT) + 1) +#define INTEL_DP_LANE_COUNT_EXP_BITS order_base_2(INTEL_DP_MAX_SUPPORTED_LANE_CONFIGS) +#define INTEL_DP_LINK_RATE_IDX_BITS (BITS_PER_TYPE(u8) - INTEL_DP_LANE_COUNT_EXP_BITS) +#define INTEL_DP_MAX_LINK_CONFIGS (DP_MAX_SUPPORTED_RATES * \ + INTEL_DP_MAX_SUPPORTED_LANE_CONFIGS) + struct intel_dp_link_config { + u8 link_rate_idx:INTEL_DP_LINK_RATE_IDX_BITS; + u8 lane_count_exp:INTEL_DP_LANE_COUNT_EXP_BITS; + } configs[INTEL_DP_MAX_LINK_CONFIGS]; /* Max lane count for the current link */ int max_lane_count; /* Max rate for the current link */ int max_rate; + /* + * Link parameters for which the MST topology was probed. + * Tracking these ensures that the MST path resources are + * re-enumerated whenever the link is retrained with new link + * parameters, as required by the DP standard. + */ + int mst_probed_lane_count; + int mst_probed_rate; int force_lane_count; int force_rate; bool retrain_disabled; @@ -1806,6 +1827,7 @@ struct intel_dp { /* connector directly attached - won't be use for modeset in mst world */ struct intel_connector *attached_connector; + bool as_sdp_supported; struct drm_dp_tunnel *tunnel; bool tunnel_suspended:1; @@ -2178,35 +2200,14 @@ to_intel_frontbuffer(struct drm_framebuffer *fb) return fb ? to_intel_framebuffer(fb)->frontbuffer : NULL; } -static inline int to_bpp_int(int bpp_x16) -{ - return bpp_x16 >> 4; -} - -static inline int to_bpp_frac(int bpp_x16) -{ - return bpp_x16 & 0xf; -} - -#define BPP_X16_FMT "%d.%04d" -#define BPP_X16_ARGS(bpp_x16) to_bpp_int(bpp_x16), (to_bpp_frac(bpp_x16) * 625) - -static inline int to_bpp_int_roundup(int bpp_x16) -{ - return (bpp_x16 + 0xf) >> 4; -} - -static inline int to_bpp_x16(int bpp) -{ - return bpp << 4; -} - /* * Conversion functions/macros from various pointer types to struct * intel_display pointer. */ #define __drm_device_to_intel_display(p) \ (&to_i915(p)->display) +#define __intel_atomic_state_to_intel_display(p) \ + __drm_device_to_intel_display((p)->base.dev) #define __intel_connector_to_intel_display(p) \ __drm_device_to_intel_display((p)->base.dev) #define __intel_crtc_to_intel_display(p) \ @@ -2230,6 +2231,7 @@ static inline int to_bpp_x16(int bpp) #define to_intel_display(p) \ _Generic(*p, \ __assoc(drm_device, p), \ + __assoc(intel_atomic_state, p), \ __assoc(intel_connector, p), \ __assoc(intel_crtc, p), \ __assoc(intel_crtc_state, p), \ diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 73977b173898..7c756d5ba2a2 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -391,7 +391,7 @@ static const struct stepping_info * intel_get_stepping_info(struct drm_i915_private *i915, struct stepping_info *si) { - const char *step_name = intel_display_step_name(i915); + const char *step_name = intel_step_name(INTEL_DISPLAY_STEP(i915)); si->stepping = step_name[0]; si->substepping = step_name[1]; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 257b441897f6..789c2f78826d 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -29,6 +29,7 @@ #include <linux/i2c.h> #include <linux/notifier.h> #include <linux/slab.h> +#include <linux/sort.h> #include <linux/string_helpers.h> #include <linux/timekeeping.h> #include <linux/types.h> @@ -42,6 +43,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc.h> #include <drm/drm_edid.h> +#include <drm/drm_fixed.h> #include <drm/drm_probe_helper.h> #include "g4x_dp.h" @@ -129,14 +131,6 @@ bool intel_dp_is_edp(struct intel_dp *intel_dp) return dig_port->base.type == INTEL_OUTPUT_EDP; } -bool intel_dp_as_sdp_supported(struct intel_dp *intel_dp) -{ - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - - return HAS_AS_SDP(i915) && - drm_dp_as_sdp_supported(&intel_dp->aux, intel_dp->dpcd); -} - static void intel_dp_unset_edid(struct intel_dp *intel_dp); /* Is link rate UHBR and thus 128b/132b? */ @@ -642,6 +636,106 @@ int intel_dp_rate_index(const int *rates, int len, int rate) return -1; } +static int intel_dp_link_config_rate(struct intel_dp *intel_dp, + const struct intel_dp_link_config *lc) +{ + return intel_dp_common_rate(intel_dp, lc->link_rate_idx); +} + +static int intel_dp_link_config_lane_count(const struct intel_dp_link_config *lc) +{ + return 1 << lc->lane_count_exp; +} + +static int intel_dp_link_config_bw(struct intel_dp *intel_dp, + const struct intel_dp_link_config *lc) +{ + return drm_dp_max_dprx_data_rate(intel_dp_link_config_rate(intel_dp, lc), + intel_dp_link_config_lane_count(lc)); +} + +static int link_config_cmp_by_bw(const void *a, const void *b, const void *p) +{ + struct intel_dp *intel_dp = (struct intel_dp *)p; /* remove const */ + const struct intel_dp_link_config *lc_a = a; + const struct intel_dp_link_config *lc_b = b; + int bw_a = intel_dp_link_config_bw(intel_dp, lc_a); + int bw_b = intel_dp_link_config_bw(intel_dp, lc_b); + + if (bw_a != bw_b) + return bw_a - bw_b; + + return intel_dp_link_config_rate(intel_dp, lc_a) - + intel_dp_link_config_rate(intel_dp, lc_b); +} + +static void intel_dp_link_config_init(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_dp_link_config *lc; + int num_common_lane_configs; + int i; + int j; + + if (drm_WARN_ON(&i915->drm, !is_power_of_2(intel_dp_max_common_lane_count(intel_dp)))) + return; + + num_common_lane_configs = ilog2(intel_dp_max_common_lane_count(intel_dp)) + 1; + + if (drm_WARN_ON(&i915->drm, intel_dp->num_common_rates * num_common_lane_configs > + ARRAY_SIZE(intel_dp->link.configs))) + return; + + intel_dp->link.num_configs = intel_dp->num_common_rates * num_common_lane_configs; + + lc = &intel_dp->link.configs[0]; + for (i = 0; i < intel_dp->num_common_rates; i++) { + for (j = 0; j < num_common_lane_configs; j++) { + lc->lane_count_exp = j; + lc->link_rate_idx = i; + + lc++; + } + } + + sort_r(intel_dp->link.configs, intel_dp->link.num_configs, + sizeof(intel_dp->link.configs[0]), + link_config_cmp_by_bw, NULL, + intel_dp); +} + +void intel_dp_link_config_get(struct intel_dp *intel_dp, int idx, int *link_rate, int *lane_count) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + const struct intel_dp_link_config *lc; + + if (drm_WARN_ON(&i915->drm, idx < 0 || idx >= intel_dp->link.num_configs)) + idx = 0; + + lc = &intel_dp->link.configs[idx]; + + *link_rate = intel_dp_link_config_rate(intel_dp, lc); + *lane_count = intel_dp_link_config_lane_count(lc); +} + +int intel_dp_link_config_index(struct intel_dp *intel_dp, int link_rate, int lane_count) +{ + int link_rate_idx = intel_dp_rate_index(intel_dp->common_rates, intel_dp->num_common_rates, + link_rate); + int lane_count_exp = ilog2(lane_count); + int i; + + for (i = 0; i < intel_dp->link.num_configs; i++) { + const struct intel_dp_link_config *lc = &intel_dp->link.configs[i]; + + if (lc->lane_count_exp == lane_count_exp && + lc->link_rate_idx == link_rate_idx) + return i; + } + + return -1; +} + static void intel_dp_set_common_rates(struct intel_dp *intel_dp) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); @@ -660,6 +754,8 @@ static void intel_dp_set_common_rates(struct intel_dp *intel_dp) intel_dp->common_rates[0] = 162000; intel_dp->num_common_rates = 1; } + + intel_dp_link_config_init(intel_dp); } static bool intel_dp_link_params_valid(struct intel_dp *intel_dp, int link_rate, @@ -1598,8 +1694,8 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, int bpp, i, lane_count, clock = intel_dp_mode_clock(pipe_config, conn_state); int mode_rate, link_rate, link_avail; - for (bpp = to_bpp_int(limits->link.max_bpp_x16); - bpp >= to_bpp_int(limits->link.min_bpp_x16); + for (bpp = fxp_q4_to_int(limits->link.max_bpp_x16); + bpp >= fxp_q4_to_int(limits->link.min_bpp_x16); bpp -= 2 * 3) { int link_bpp = intel_dp_output_bpp(pipe_config->output_format, bpp); @@ -1927,7 +2023,7 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp, timeslots); if (ret == 0) { pipe_config->dsc.compressed_bpp_x16 = - to_bpp_x16(valid_dsc_bpp[i]); + fxp_q4_from_int(valid_dsc_bpp[i]); return 0; } } @@ -1970,7 +2066,7 @@ xelpd_dsc_compute_link_config(struct intel_dp *intel_dp, compressed_bppx16 >= dsc_min_bpp; compressed_bppx16 -= bppx16_step) { if (intel_dp->force_dsc_fractional_bpp_en && - !to_bpp_frac(compressed_bppx16)) + !fxp_q4_to_frac(compressed_bppx16)) continue; ret = dsc_compute_link_config(intel_dp, pipe_config, @@ -1980,7 +2076,7 @@ xelpd_dsc_compute_link_config(struct intel_dp *intel_dp, if (ret == 0) { pipe_config->dsc.compressed_bpp_x16 = compressed_bppx16; if (intel_dp->force_dsc_fractional_bpp_en && - to_bpp_frac(compressed_bppx16)) + fxp_q4_to_frac(compressed_bppx16)) drm_dbg_kms(&i915->drm, "Forcing DSC fractional bpp\n"); return 0; @@ -2005,7 +2101,7 @@ static int dsc_compute_compressed_bpp(struct intel_dp *intel_dp, dsc_src_min_bpp = dsc_src_min_compressed_bpp(); dsc_sink_min_bpp = intel_dp_dsc_sink_min_compressed_bpp(pipe_config); dsc_min_bpp = max(dsc_src_min_bpp, dsc_sink_min_bpp); - dsc_min_bpp = max(dsc_min_bpp, to_bpp_int_roundup(limits->link.min_bpp_x16)); + dsc_min_bpp = max(dsc_min_bpp, fxp_q4_to_int_roundup(limits->link.min_bpp_x16)); dsc_src_max_bpp = dsc_src_max_compressed_bpp(intel_dp); dsc_sink_max_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector, @@ -2017,7 +2113,7 @@ static int dsc_compute_compressed_bpp(struct intel_dp *intel_dp, adjusted_mode->hdisplay, pipe_config->joiner_pipes); dsc_max_bpp = min(dsc_max_bpp, dsc_joiner_max_bpp); - dsc_max_bpp = min(dsc_max_bpp, to_bpp_int(limits->link.max_bpp_x16)); + dsc_max_bpp = min(dsc_max_bpp, fxp_q4_to_int(limits->link.max_bpp_x16)); if (DISPLAY_VER(i915) >= 13) return xelpd_dsc_compute_link_config(intel_dp, connector, pipe_config, limits, @@ -2167,20 +2263,20 @@ static int intel_edp_dsc_compute_pipe_bpp(struct intel_dp *intel_dp, dsc_src_min_bpp = dsc_src_min_compressed_bpp(); dsc_sink_min_bpp = intel_dp_dsc_sink_min_compressed_bpp(pipe_config); dsc_min_bpp = max(dsc_src_min_bpp, dsc_sink_min_bpp); - dsc_min_bpp = max(dsc_min_bpp, to_bpp_int_roundup(limits->link.min_bpp_x16)); + dsc_min_bpp = max(dsc_min_bpp, fxp_q4_to_int_roundup(limits->link.min_bpp_x16)); dsc_src_max_bpp = dsc_src_max_compressed_bpp(intel_dp); dsc_sink_max_bpp = intel_dp_dsc_sink_max_compressed_bpp(connector, pipe_config, pipe_bpp / 3); dsc_max_bpp = dsc_sink_max_bpp ? min(dsc_sink_max_bpp, dsc_src_max_bpp) : dsc_src_max_bpp; - dsc_max_bpp = min(dsc_max_bpp, to_bpp_int(limits->link.max_bpp_x16)); + dsc_max_bpp = min(dsc_max_bpp, fxp_q4_to_int(limits->link.max_bpp_x16)); /* Compressed BPP should be less than the Input DSC bpp */ dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); pipe_config->dsc.compressed_bpp_x16 = - to_bpp_x16(max(dsc_min_bpp, dsc_max_bpp)); + fxp_q4_from_int(max(dsc_min_bpp, dsc_max_bpp)); pipe_config->pipe_bpp = pipe_bpp; @@ -2270,17 +2366,17 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, if (ret < 0) { drm_dbg_kms(&dev_priv->drm, "Cannot compute valid DSC parameters for Input Bpp = %d" - "Compressed BPP = " BPP_X16_FMT "\n", + "Compressed BPP = " FXP_Q4_FMT "\n", pipe_config->pipe_bpp, - BPP_X16_ARGS(pipe_config->dsc.compressed_bpp_x16)); + FXP_Q4_ARGS(pipe_config->dsc.compressed_bpp_x16)); return ret; } pipe_config->dsc.compression_enable = true; drm_dbg_kms(&dev_priv->drm, "DP DSC computed with Input Bpp = %d " - "Compressed Bpp = " BPP_X16_FMT " Slice Count = %d\n", + "Compressed Bpp = " FXP_Q4_FMT " Slice Count = %d\n", pipe_config->pipe_bpp, - BPP_X16_ARGS(pipe_config->dsc.compressed_bpp_x16), + FXP_Q4_ARGS(pipe_config->dsc.compressed_bpp_x16), pipe_config->dsc.slice_count); return 0; @@ -2312,15 +2408,15 @@ intel_dp_compute_config_link_bpp_limits(struct intel_dp *intel_dp, int max_link_bpp_x16; max_link_bpp_x16 = min(crtc_state->max_link_bpp_x16, - to_bpp_x16(limits->pipe.max_bpp)); + fxp_q4_from_int(limits->pipe.max_bpp)); if (!dsc) { - max_link_bpp_x16 = rounddown(max_link_bpp_x16, to_bpp_x16(2 * 3)); + max_link_bpp_x16 = rounddown(max_link_bpp_x16, fxp_q4_from_int(2 * 3)); - if (max_link_bpp_x16 < to_bpp_x16(limits->pipe.min_bpp)) + if (max_link_bpp_x16 < fxp_q4_from_int(limits->pipe.min_bpp)) return false; - limits->link.min_bpp_x16 = to_bpp_x16(limits->pipe.min_bpp); + limits->link.min_bpp_x16 = fxp_q4_from_int(limits->pipe.min_bpp); } else { /* * TODO: set the DSC link limits already here, atm these are @@ -2333,7 +2429,7 @@ intel_dp_compute_config_link_bpp_limits(struct intel_dp *intel_dp, limits->link.max_bpp_x16 = max_link_bpp_x16; drm_dbg_kms(&i915->drm, - "[ENCODER:%d:%s][CRTC:%d:%s] DP link limits: pixel clock %d kHz DSC %s max lanes %d max rate %d max pipe_bpp %d max link_bpp " BPP_X16_FMT "\n", + "[ENCODER:%d:%s][CRTC:%d:%s] DP link limits: pixel clock %d kHz DSC %s max lanes %d max rate %d max pipe_bpp %d max link_bpp " FXP_Q4_FMT "\n", encoder->base.base.id, encoder->base.name, crtc->base.base.id, crtc->base.name, adjusted_mode->crtc_clock, @@ -2341,7 +2437,7 @@ intel_dp_compute_config_link_bpp_limits(struct intel_dp *intel_dp, limits->max_lane_count, limits->max_rate, limits->pipe.max_bpp, - BPP_X16_ARGS(limits->link.max_bpp_x16)); + FXP_Q4_ARGS(limits->link.max_bpp_x16)); return true; } @@ -2393,7 +2489,7 @@ int intel_dp_config_required_rate(const struct intel_crtc_state *crtc_state) const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; int bpp = crtc_state->dsc.compression_enable ? - to_bpp_int_roundup(crtc_state->dsc.compressed_bpp_x16) : + fxp_q4_to_int_roundup(crtc_state->dsc.compressed_bpp_x16) : crtc_state->pipe_bpp; return intel_dp_link_required(adjusted_mode->crtc_clock, bpp); @@ -2472,10 +2568,10 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, } drm_dbg_kms(&i915->drm, - "DP lane count %d clock %d bpp input %d compressed " BPP_X16_FMT " link rate required %d available %d\n", + "DP lane count %d clock %d bpp input %d compressed " FXP_Q4_FMT " link rate required %d available %d\n", pipe_config->lane_count, pipe_config->port_clock, pipe_config->pipe_bpp, - BPP_X16_ARGS(pipe_config->dsc.compressed_bpp_x16), + FXP_Q4_ARGS(pipe_config->dsc.compressed_bpp_x16), intel_dp_config_required_rate(pipe_config), intel_dp_max_link_data_rate(intel_dp, pipe_config->port_clock, @@ -2625,8 +2721,7 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp, const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - if (!crtc_state->vrr.enable || - !intel_dp_as_sdp_supported(intel_dp)) + if (!crtc_state->vrr.enable || !intel_dp->as_sdp_supported) return; crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_ADAPTIVE_SYNC); @@ -2875,7 +2970,6 @@ static void intel_dp_queue_modeset_retry_work(struct intel_connector *connector) drm_connector_put(&connector->base); } -/* NOTE: @state is only valid for MST links and can be %NULL for SST. */ void intel_dp_queue_modeset_retry_for_link(struct intel_atomic_state *state, struct intel_encoder *encoder, @@ -2884,18 +2978,19 @@ intel_dp_queue_modeset_retry_for_link(struct intel_atomic_state *state, struct intel_connector *connector; struct intel_digital_connector_state *conn_state; struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); int i; + if (intel_dp->needs_modeset_retry) + return; + + intel_dp->needs_modeset_retry = true; + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { intel_dp_queue_modeset_retry_work(intel_dp->attached_connector); return; } - if (drm_WARN_ON(&i915->drm, !state)) - return; - for_each_new_intel_connector_in_state(state, connector, conn_state, i) { if (!conn_state->base.crtc) continue; @@ -2967,8 +3062,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (pipe_config->dsc.compression_enable) link_bpp_x16 = pipe_config->dsc.compressed_bpp_x16; else - link_bpp_x16 = to_bpp_x16(intel_dp_output_bpp(pipe_config->output_format, - pipe_config->pipe_bpp)); + link_bpp_x16 = fxp_q4_from_int(intel_dp_output_bpp(pipe_config->output_format, + pipe_config->pipe_bpp)); if (intel_dp->mso_link_count) { int n = intel_dp->mso_link_count; @@ -3023,6 +3118,7 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp, { memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set)); intel_dp->link_trained = false; + intel_dp->needs_modeset_retry = false; intel_dp->link_rate = link_rate; intel_dp->lane_count = lane_count; } @@ -3031,6 +3127,8 @@ void intel_dp_reset_link_params(struct intel_dp *intel_dp) { intel_dp->link.max_lane_count = intel_dp_max_common_lane_count(intel_dp); intel_dp->link.max_rate = intel_dp_max_common_rate(intel_dp); + intel_dp->link.mst_probed_lane_count = 0; + intel_dp->link.mst_probed_rate = 0; intel_dp->link.retrain_disabled = false; intel_dp->link.seq_train_failures = 0; } @@ -3366,8 +3464,11 @@ void intel_dp_sync_state(struct intel_encoder *encoder, intel_dp_tunnel_resume(intel_dp, crtc_state, dpcd_updated); - if (crtc_state) + if (crtc_state) { intel_dp_reset_link_params(intel_dp); + intel_dp_set_link_params(intel_dp, crtc_state->port_clock, crtc_state->lane_count); + intel_dp->link_trained = true; + } } bool intel_dp_initial_fastset_check(struct intel_encoder *encoder, @@ -3434,7 +3535,7 @@ static void intel_dp_get_pcon_dsc_cap(struct intel_dp *intel_dp) static int intel_dp_pcon_get_frl_mask(u8 frl_bw_mask) { - int bw_gbps[] = {9, 18, 24, 32, 40, 48}; + static const int bw_gbps[] = {9, 18, 24, 32, 40, 48}; int i; for (i = ARRAY_SIZE(bw_gbps) - 1; i >= 0; i--) { @@ -4154,6 +4255,9 @@ intel_dp_mst_configure(struct intel_dp *intel_dp) intel_dp->is_mst = intel_dp->mst_detect != DRM_DP_SST; + if (intel_dp->is_mst) + intel_dp_mst_prepare_probe(intel_dp); + drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst); /* Avoid stale info on the next detect cycle. */ @@ -4383,8 +4487,11 @@ void intel_dp_set_infoframes(struct intel_encoder *encoder, if (!enable && HAS_DSC(dev_priv)) val &= ~VDIP_ENABLE_PPS; - /* When PSR is enabled, this routine doesn't disable VSC DIP */ - if (!crtc_state->has_psr) + /* + * This routine disables VSC DIP if the function is called + * to disable SDP or if it does not have PSR + */ + if (!enable || !crtc_state->has_psr) val &= ~VIDEO_DIP_ENABLE_VSC_HSW; intel_de_write(dev_priv, reg, val); @@ -5251,8 +5358,6 @@ static int intel_dp_retrain_link(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - struct intel_crtc *crtc; - bool mst_output = false; u8 pipe_mask; int ret; @@ -5281,78 +5386,28 @@ static int intel_dp_retrain_link(struct intel_encoder *encoder, encoder->base.base.id, encoder->base.name, str_yes_no(intel_dp->link.force_retrain)); - for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, crtc, pipe_mask) { - const struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); - - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) { - mst_output = true; - break; - } - - /* Suppress underruns caused by re-training */ - intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); - if (crtc_state->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, - intel_crtc_pch_transcoder(crtc), false); - } - - /* TODO: use a modeset for SST as well. */ - if (mst_output) { - ret = intel_modeset_commit_pipes(dev_priv, pipe_mask, ctx); - - if (ret && ret != -EDEADLK) - drm_dbg_kms(&dev_priv->drm, - "[ENCODER:%d:%s] link retraining failed: %pe\n", - encoder->base.base.id, encoder->base.name, - ERR_PTR(ret)); - - goto out; - } - - for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, crtc, pipe_mask) { - const struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); - - intel_dp->link_trained = false; - - intel_dp_check_frl_training(intel_dp); - intel_dp_pcon_dsc_configure(intel_dp, crtc_state); - intel_dp_start_link_train(NULL, intel_dp, crtc_state); - intel_dp_stop_link_train(intel_dp, crtc_state); - break; - } - - for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, crtc, pipe_mask) { - const struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); + ret = intel_modeset_commit_pipes(dev_priv, pipe_mask, ctx); + if (ret == -EDEADLK) + return ret; - /* Keep underrun reporting disabled until things are stable */ - intel_crtc_wait_for_next_vblank(crtc); + intel_dp->link.force_retrain = false; - intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); - if (crtc_state->has_pch_encoder) - intel_set_pch_fifo_underrun_reporting(dev_priv, - intel_crtc_pch_transcoder(crtc), true); - } - -out: - if (ret != -EDEADLK) - intel_dp->link.force_retrain = false; + if (ret) + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] link retraining failed: %pe\n", + encoder->base.base.id, encoder->base.name, + ERR_PTR(ret)); return ret; } void intel_dp_link_check(struct intel_encoder *encoder) { - struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct drm_modeset_acquire_ctx ctx; int ret; intel_modeset_lock_ctx_retry(&ctx, NULL, 0, ret) ret = intel_dp_retrain_link(encoder, &ctx); - - drm_WARN_ON(&i915->drm, ret); } void intel_dp_check_link_state(struct intel_dp *intel_dp) @@ -5902,6 +5957,15 @@ intel_dp_detect_dsc_caps(struct intel_dp *intel_dp, struct intel_connector *conn connector); } +static void +intel_dp_detect_sdp_caps(struct intel_dp *intel_dp) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + + intel_dp->as_sdp_supported = HAS_AS_SDP(i915) && + drm_dp_as_sdp_supported(&intel_dp->aux, intel_dp->dpcd); +} + static int intel_dp_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, @@ -5935,6 +5999,18 @@ intel_dp_detect(struct drm_connector *connector, else status = connector_status_disconnected; + if (status != connector_status_disconnected && + !intel_dp_mst_verify_dpcd_state(intel_dp)) + /* + * This requires retrying detection for instance to re-enable + * the MST mode that got reset via a long HPD pulse. The retry + * will happen either via the hotplug handler's retry logic, + * ensured by setting the connector here to SST/disconnected, + * or via a userspace connector probing in response to the + * hotplug uevent sent when removing the MST connectors. + */ + status = connector_status_disconnected; + if (status == connector_status_disconnected) { memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance)); memset(intel_connector->dp.dsc_dpcd, 0, sizeof(intel_connector->dp.dsc_dpcd)); @@ -5960,13 +6036,15 @@ intel_dp_detect(struct drm_connector *connector, intel_dp_detect_dsc_caps(intel_dp, intel_connector); - intel_dp_mst_configure(intel_dp); + intel_dp_detect_sdp_caps(intel_dp); if (intel_dp->reset_link_params) { intel_dp_reset_link_params(intel_dp); intel_dp->reset_link_params = false; } + intel_dp_mst_configure(intel_dp); + intel_dp_print_rates(intel_dp); if (intel_dp->is_mst) { @@ -6437,8 +6515,9 @@ static bool _intel_dp_is_port_edp(struct drm_i915_private *dev_priv, bool intel_dp_is_port_edp(struct drm_i915_private *i915, enum port port) { + struct intel_display *display = &i915->display; const struct intel_bios_encoder_data *devdata = - intel_bios_encoder_data_lookup(i915, port); + intel_bios_encoder_data_lookup(display, port); return _intel_dp_is_port_edp(i915, devdata, port); } @@ -6541,6 +6620,7 @@ static void intel_edp_backlight_setup(struct intel_dp *intel_dp, static bool intel_edp_init_connector(struct intel_dp *intel_dp, struct intel_connector *intel_connector) { + struct intel_display *display = to_intel_display(intel_dp); struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct drm_connector *connector = &intel_connector->base; struct drm_display_mode *fixed_mode; @@ -6566,7 +6646,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, return false; } - intel_bios_init_panel_early(dev_priv, &intel_connector->panel, + intel_bios_init_panel_early(display, &intel_connector->panel, encoder->devdata); if (!intel_pps_init(intel_dp)) { @@ -6663,7 +6743,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, drm_edid = ERR_PTR(-ENOENT); } - intel_bios_init_panel_late(dev_priv, &intel_connector->panel, encoder->devdata, + intel_bios_init_panel_late(display, &intel_connector->panel, encoder->devdata, IS_ERR(drm_edid) ? NULL : drm_edid); intel_panel_add_edid_fixed_modes(intel_connector, true); diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index a0f990a95ecc..1b9aaddd8c35 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -85,7 +85,6 @@ void intel_dp_audio_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state); bool intel_dp_has_hdmi_sink(struct intel_dp *intel_dp); bool intel_dp_is_edp(struct intel_dp *intel_dp); -bool intel_dp_as_sdp_supported(struct intel_dp *intel_dp); bool intel_dp_is_uhbr(const struct intel_crtc_state *crtc_state); bool intel_dp_has_dsc(const struct intel_connector *connector); int intel_dp_link_symbol_size(int rate); @@ -108,6 +107,8 @@ int intel_dp_max_common_rate(struct intel_dp *intel_dp); int intel_dp_max_common_lane_count(struct intel_dp *intel_dp); int intel_dp_common_rate(struct intel_dp *intel_dp, int index); int intel_dp_rate_index(const int *rates, int len, int rate); +int intel_dp_link_config_index(struct intel_dp *intel_dp, int link_rate, int lane_count); +void intel_dp_link_config_get(struct intel_dp *intel_dp, int idx, int *link_rate, int *lane_count); void intel_dp_update_sink_caps(struct intel_dp *intel_dp); void intel_dp_reset_link_params(struct intel_dp *intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index b8a53bb174da..cbc817bb0cc3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -83,7 +83,7 @@ static u32 g4x_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * The clock divider is based off the hrawclk, and would like to run at * 2MHz. So, take the hrawclk value and divide by 2000 and use that */ - return DIV_ROUND_CLOSEST(RUNTIME_INFO(i915)->rawclk_freq, 2000); + return DIV_ROUND_CLOSEST(DISPLAY_RUNTIME_INFO(i915)->rawclk_freq, 2000); } static u32 ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) @@ -103,7 +103,7 @@ static u32 ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) if (dig_port->aux_ch == AUX_CH_A) freq = i915->display.cdclk.hw.cdclk; else - freq = RUNTIME_INFO(i915)->rawclk_freq; + freq = DISPLAY_RUNTIME_INFO(i915)->rawclk_freq; return DIV_ROUND_CLOSEST(freq, 2000); } diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 8ce60d53dcde..33f72db99b58 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -109,7 +109,7 @@ static bool is_intel_tcon_cap(const u8 tcon_cap[4]) static bool intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); struct drm_dp_aux *aux = &intel_dp->aux; struct intel_panel *panel = &connector->panel; @@ -122,7 +122,8 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) if (ret != sizeof(tcon_cap)) return false; - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] Detected %s HDR backlight interface version %d\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] Detected %s HDR backlight interface version %d\n", connector->base.base.id, connector->base.name, is_intel_tcon_cap(tcon_cap) ? "Intel" : "unsupported", tcon_cap[0]); @@ -141,10 +142,10 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) * HDR static metadata we need to start maintaining table of * ranges for such panels. */ - if (i915->display.params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL && + if (display->params.enable_dpcd_backlight != INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL && !(connector->base.hdr_sink_metadata.hdmi_type1.metadata_type & BIT(HDMI_STATIC_METADATA_TYPE1))) { - drm_info(&i915->drm, + drm_info(display->drm, "[CONNECTOR:%d:%s] Panel is missing HDR static metadata. Possible support for Intel HDR backlight interface is not used. If your backlight controls don't work try booting with i915.enable_dpcd_backlight=%d. needs this, please file a _new_ bug report on drm/i915, see " FDO_BUG_URL " for details.\n", connector->base.base.id, connector->base.name, INTEL_DP_AUX_BACKLIGHT_FORCE_INTEL); @@ -170,14 +171,15 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) static u32 intel_dp_aux_hdr_get_backlight(struct intel_connector *connector, enum pipe pipe) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct intel_panel *panel = &connector->panel; struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); u8 tmp; u8 buf[2] = {}; if (drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &tmp) != 1) { - drm_err(&i915->drm, "[CONNECTOR:%d:%s] Failed to read current backlight mode from DPCD\n", + drm_err(display->drm, + "[CONNECTOR:%d:%s] Failed to read current backlight mode from DPCD\n", connector->base.base.id, connector->base.name); return 0; } @@ -195,7 +197,8 @@ intel_dp_aux_hdr_get_backlight(struct intel_connector *connector, enum pipe pipe if (drm_dp_dpcd_read(&intel_dp->aux, INTEL_EDP_BRIGHTNESS_NITS_LSB, buf, sizeof(buf)) != sizeof(buf)) { - drm_err(&i915->drm, "[CONNECTOR:%d:%s] Failed to read brightness from DPCD\n", + drm_err(display->drm, + "[CONNECTOR:%d:%s] Failed to read brightness from DPCD\n", connector->base.base.id, connector->base.name); return 0; } @@ -253,8 +256,8 @@ static void intel_dp_aux_write_content_luminance(struct intel_connector *connector, struct hdr_output_metadata *hdr_metadata) { + struct intel_display *display = to_intel_display(connector); struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - struct drm_i915_private *i915 = to_i915(connector->base.dev); int ret; u8 buf[4]; @@ -270,7 +273,7 @@ intel_dp_aux_write_content_luminance(struct intel_connector *connector, INTEL_EDP_HDR_CONTENT_LUMINANCE, buf, sizeof(buf)); if (ret < 0) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Content Luminance DPCD reg write failed, err:-%d\n", ret); } @@ -280,7 +283,7 @@ intel_dp_aux_fill_hdr_tcon_params(const struct drm_connector_state *conn_state, { struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_panel *panel = &connector->panel; - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); /* * According to spec segmented backlight needs to be set whenever panel is in @@ -291,7 +294,7 @@ intel_dp_aux_fill_hdr_tcon_params(const struct drm_connector_state *conn_state, *ctrl |= INTEL_EDP_HDR_TCON_2084_DECODE_ENABLE; } - if (DISPLAY_VER(i915) < 11) + if (DISPLAY_VER(display) < 11) *ctrl &= ~INTEL_EDP_HDR_TCON_TONE_MAPPING_ENABLE; if (panel->backlight.edp.intel_cap.supports_2020_gamut && @@ -311,9 +314,9 @@ static void intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state, u32 level) { + struct intel_display *display = to_intel_display(crtc_state); struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_panel *panel = &connector->panel; - struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); struct hdr_output_metadata *hdr_metadata; int ret; @@ -323,7 +326,8 @@ intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state, ret = drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &old_ctrl); if (ret != 1) { - drm_err(&i915->drm, "[CONNECTOR:%d:%s] Failed to read current backlight control mode: %d\n", + drm_err(display->drm, + "[CONNECTOR:%d:%s] Failed to read current backlight control mode: %d\n", connector->base.base.id, connector->base.name, ret); return; } @@ -346,7 +350,8 @@ intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state, if (ctrl != old_ctrl && drm_dp_dpcd_writeb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, ctrl) != 1) - drm_err(&i915->drm, "[CONNECTOR:%d:%s] Failed to configure DPCD brightness controls\n", + drm_err(display->drm, + "[CONNECTOR:%d:%s] Failed to configure DPCD brightness controls\n", connector->base.base.id, connector->base.name); if (intel_dp_in_hdr_mode(conn_state)) { @@ -377,7 +382,7 @@ static const char *dpcd_vs_pwm_str(bool aux) static void intel_dp_aux_write_panel_luminance_override(struct intel_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct intel_panel *panel = &connector->panel; struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); int ret; @@ -392,7 +397,7 @@ intel_dp_aux_write_panel_luminance_override(struct intel_connector *connector) INTEL_EDP_HDR_PANEL_LUMINANCE_OVERRIDE, buf, sizeof(buf)); if (ret < 0) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Panel Luminance DPCD reg write failed, err:-%d\n", ret); } @@ -400,20 +405,21 @@ intel_dp_aux_write_panel_luminance_override(struct intel_connector *connector) static int intel_dp_aux_hdr_setup_backlight(struct intel_connector *connector, enum pipe pipe) { - struct drm_i915_private *i915 = to_i915(connector->base.dev); + struct intel_display *display = to_intel_display(connector); struct intel_panel *panel = &connector->panel; struct drm_luminance_range_info *luminance_range = &connector->base.display_info.luminance_range; int ret; - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] SDR backlight is controlled through %s\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] SDR backlight is controlled through %s\n", connector->base.base.id, connector->base.name, dpcd_vs_pwm_str(panel->backlight.edp.intel_cap.sdr_uses_aux)); if (!panel->backlight.edp.intel_cap.sdr_uses_aux) { ret = panel->backlight.pwm_funcs->setup(connector, pipe); if (ret < 0) { - drm_err(&i915->drm, + drm_err(display->drm, "[CONNECTOR:%d:%s] Failed to setup SDR backlight controls through PWM: %d\n", connector->base.base.id, connector->base.name, ret); return ret; @@ -430,7 +436,8 @@ intel_dp_aux_hdr_setup_backlight(struct intel_connector *connector, enum pipe pi intel_dp_aux_write_panel_luminance_override(connector); - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] Using AUX HDR interface for backlight control (range %d..%d)\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] Using AUX HDR interface for backlight control (range %d..%d)\n", connector->base.base.id, connector->base.name, panel->backlight.min, panel->backlight.max); @@ -501,9 +508,9 @@ static void intel_dp_aux_vesa_disable_backlight(const struct drm_connector_state static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, enum pipe pipe) { + struct intel_display *display = to_intel_display(connector); struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_panel *panel = &connector->panel; - struct drm_i915_private *i915 = dp_to_i915(intel_dp); u16 current_level; u8 current_mode; int ret; @@ -514,17 +521,19 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, if (ret < 0) return ret; - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] AUX VESA backlight enable is controlled through %s\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] AUX VESA backlight enable is controlled through %s\n", connector->base.base.id, connector->base.name, dpcd_vs_pwm_str(panel->backlight.edp.vesa.info.aux_enable)); - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] AUX VESA backlight level is controlled through %s\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] AUX VESA backlight level is controlled through %s\n", connector->base.base.id, connector->base.name, dpcd_vs_pwm_str(panel->backlight.edp.vesa.info.aux_set)); if (!panel->backlight.edp.vesa.info.aux_set || !panel->backlight.edp.vesa.info.aux_enable) { ret = panel->backlight.pwm_funcs->setup(connector, pipe); if (ret < 0) { - drm_err(&i915->drm, + drm_err(display->drm, "[CONNECTOR:%d:%s] Failed to setup PWM backlight controls for eDP backlight: %d\n", connector->base.base.id, connector->base.name, ret); return ret; @@ -553,7 +562,8 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, } } - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] Using AUX VESA interface for backlight control\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] Using AUX VESA interface for backlight control\n", connector->base.base.id, connector->base.name); return 0; @@ -562,11 +572,12 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, static bool intel_dp_aux_supports_vesa_backlight(struct intel_connector *connector) { + struct intel_display *display = to_intel_display(connector); struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); if (drm_edp_backlight_supported(intel_dp->edp_dpcd)) { - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] AUX Backlight Control Supported!\n", + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] AUX Backlight Control Supported!\n", connector->base.base.id, connector->base.name); return true; } @@ -591,16 +602,15 @@ static const struct intel_panel_bl_funcs intel_dp_vesa_bl_funcs = { int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector) { + struct intel_display *display = to_intel_display(connector); struct drm_device *dev = connector->base.dev; struct intel_panel *panel = &connector->panel; - struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); bool try_intel_interface = false, try_vesa_interface = false; /* Check the VBT and user's module parameters to figure out which * interfaces to probe */ - switch (i915->display.params.enable_dpcd_backlight) { + switch (display->params.enable_dpcd_backlight) { case INTEL_DP_AUX_BACKLIGHT_OFF: return -ENODEV; case INTEL_DP_AUX_BACKLIGHT_AUTO: diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index 2edffe62f360..3425b3643143 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -39,7 +39,9 @@ static u32 transcoder_to_stream_enc_status(enum transcoder cpu_transcoder) static void intel_dp_hdcp_wait_for_cp_irq(struct intel_connector *connector, int timeout) { - struct intel_hdcp *hdcp = &connector->hdcp; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; long ret; #define C (hdcp->cp_irq_count_cached != atomic_read(&hdcp->cp_irq_count)) @@ -150,7 +152,7 @@ int intel_dp_hdcp_repeater_present(struct intel_digital_port *dig_port, ssize_t ret; u8 bcaps; - ret = intel_dp_hdcp_read_bcaps(&dig_port->dp.aux, i915, &bcaps); + ret = intel_dp_hdcp_read_bcaps(&dig_port->dp.aux, i915, &bcaps); if (ret) return ret; @@ -675,8 +677,15 @@ static int intel_dp_hdcp2_get_capability(struct intel_connector *connector, bool *capable) { - struct intel_digital_port *dig_port = intel_attached_dig_port(connector); - struct drm_dp_aux *aux = &dig_port->dp.aux; + struct intel_digital_port *dig_port; + struct drm_dp_aux *aux; + + *capable = false; + if (!intel_attached_encoder(connector)) + return -EINVAL; + + dig_port = intel_attached_dig_port(connector); + aux = &dig_port->dp.aux; return _intel_dp_hdcp2_get_capability(aux, capable); } diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index d044c8e36bb3..9c8738295106 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -21,6 +21,8 @@ * IN THE SOFTWARE. */ +#include <drm/display/drm_dp_helper.h> + #include "i915_drv.h" #include "intel_display_types.h" #include "intel_dp.h" @@ -114,7 +116,13 @@ intel_dp_set_lttpr_transparent_mode(struct intel_dp *intel_dp, bool enable) u8 val = enable ? DP_PHY_REPEATER_MODE_TRANSPARENT : DP_PHY_REPEATER_MODE_NON_TRANSPARENT; - return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1; + if (drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) != 1) + return false; + + intel_dp->lttpr_common_caps[DP_PHY_REPEATER_MODE - + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV] = val; + + return true; } static bool intel_dp_lttpr_transparent_mode_enabled(struct intel_dp *intel_dp) @@ -174,7 +182,7 @@ static int intel_dp_init_lttpr_phys(struct intel_dp *intel_dp, const u8 dpcd[DP_ * still taking into account any LTTPR common lane- rate/count limits. */ if (lttpr_count < 0) - return 0; + goto out_reset_lttpr_count; if (!intel_dp_set_lttpr_transparent_mode(intel_dp, false)) { lt_dbg(intel_dp, DP_PHY_DPRX, @@ -697,26 +705,28 @@ static bool intel_dp_link_max_vswing_reached(struct intel_dp *intel_dp, return true; } -static void -intel_dp_update_downspread_ctrl(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) +void intel_dp_link_training_set_mode(struct intel_dp *intel_dp, int link_rate, bool is_vrr) { u8 link_config[2]; - link_config[0] = crtc_state->vrr.flipline ? DP_MSA_TIMING_PAR_IGNORE_EN : 0; - link_config[1] = intel_dp_is_uhbr(crtc_state) ? + link_config[0] = is_vrr ? DP_MSA_TIMING_PAR_IGNORE_EN : 0; + link_config[1] = drm_dp_is_uhbr_rate(link_rate) ? DP_SET_ANSI_128B132B : DP_SET_ANSI_8B10B; drm_dp_dpcd_write(&intel_dp->aux, DP_DOWNSPREAD_CTRL, link_config, 2); } -static void -intel_dp_update_link_bw_set(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - u8 link_bw, u8 rate_select) +static void intel_dp_update_downspread_ctrl(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) { - u8 lane_count = crtc_state->lane_count; + intel_dp_link_training_set_mode(intel_dp, + crtc_state->port_clock, crtc_state->vrr.flipline); +} - if (crtc_state->enhanced_framing) +void intel_dp_link_training_set_bw(struct intel_dp *intel_dp, + int link_bw, int rate_select, int lane_count, + bool enhanced_framing) +{ + if (enhanced_framing) lane_count |= DP_LANE_COUNT_ENHANCED_FRAME_EN; if (link_bw) { @@ -740,6 +750,14 @@ intel_dp_update_link_bw_set(struct intel_dp *intel_dp, } } +static void intel_dp_update_link_bw_set(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + u8 link_bw, u8 rate_select) +{ + intel_dp_link_training_set_bw(intel_dp, link_bw, rate_select, crtc_state->lane_count, + crtc_state->enhanced_framing); +} + /* * Prepare link training by configuring the link parameters. On DDI platforms * also enable the port here. @@ -1152,6 +1170,36 @@ static bool intel_dp_can_link_train_fallback_for_edp(struct intel_dp *intel_dp, return true; } +static bool reduce_link_params_in_bw_order(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + int *new_link_rate, int *new_lane_count) +{ + int link_rate; + int lane_count; + int i; + + i = intel_dp_link_config_index(intel_dp, crtc_state->port_clock, crtc_state->lane_count); + for (i--; i >= 0; i--) { + intel_dp_link_config_get(intel_dp, i, &link_rate, &lane_count); + + if ((intel_dp->link.force_rate && + intel_dp->link.force_rate != link_rate) || + (intel_dp->link.force_lane_count && + intel_dp->link.force_lane_count != lane_count)) + continue; + + break; + } + + if (i < 0) + return false; + + *new_link_rate = link_rate; + *new_lane_count = lane_count; + + return true; +} + static int reduce_link_rate(struct intel_dp *intel_dp, int current_rate) { int rate_index; @@ -1187,6 +1235,41 @@ static int reduce_lane_count(struct intel_dp *intel_dp, int current_lane_count) return current_lane_count >> 1; } +static bool reduce_link_params_in_rate_lane_order(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + int *new_link_rate, int *new_lane_count) +{ + int link_rate; + int lane_count; + + lane_count = crtc_state->lane_count; + link_rate = reduce_link_rate(intel_dp, crtc_state->port_clock); + if (link_rate < 0) { + lane_count = reduce_lane_count(intel_dp, crtc_state->lane_count); + link_rate = intel_dp_max_common_rate(intel_dp); + } + + if (lane_count < 0) + return false; + + *new_link_rate = link_rate; + *new_lane_count = lane_count; + + return true; +} + +static bool reduce_link_params(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, + int *new_link_rate, int *new_lane_count) +{ + /* TODO: Use the same fallback logic on SST as on MST. */ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) + return reduce_link_params_in_bw_order(intel_dp, crtc_state, + new_link_rate, new_lane_count); + else + return reduce_link_params_in_rate_lane_order(intel_dp, crtc_state, + new_link_rate, new_lane_count); +} + static int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { @@ -1200,14 +1283,7 @@ static int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, return 0; } - new_lane_count = crtc_state->lane_count; - new_link_rate = reduce_link_rate(intel_dp, crtc_state->port_clock); - if (new_link_rate < 0) { - new_lane_count = reduce_lane_count(intel_dp, crtc_state->lane_count); - new_link_rate = intel_dp_max_common_rate(intel_dp); - } - - if (new_lane_count < 0) + if (!reduce_link_params(intel_dp, crtc_state, &new_link_rate, &new_lane_count)) return -1; if (intel_dp_is_edp(intel_dp) && @@ -1228,12 +1304,10 @@ static int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, return 0; } -/* NOTE: @state is only valid for MST links and can be %NULL for SST. */ static bool intel_dp_schedule_fallback_link_training(struct intel_atomic_state *state, struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; if (!intel_digital_port_connected(&dp_to_dig_port(intel_dp)->base)) { @@ -1249,11 +1323,6 @@ static bool intel_dp_schedule_fallback_link_training(struct intel_atomic_state * return false; } - if (drm_WARN_ON(&i915->drm, - intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST) && - !state)) - return false; - /* Schedule a Hotplug Uevent to userspace to start modeset */ intel_dp_queue_modeset_retry_for_link(state, encoder, crtc_state); @@ -1512,8 +1581,6 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp, * retraining with reduced link rate/lane parameters if the link training * fails. * After calling this function intel_dp_stop_link_train() must be called. - * - * NOTE: @state is only valid for MST links and can be %NULL for SST. */ void intel_dp_start_link_train(struct intel_atomic_state *state, struct intel_dp *intel_dp, @@ -1530,11 +1597,6 @@ void intel_dp_start_link_train(struct intel_atomic_state *state, */ int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp); - if (drm_WARN_ON(&i915->drm, - intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST) && - !state)) - return; - if (lttpr_count < 0) /* Still continue with enabling the port and link training. */ lttpr_count = 0; diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.h b/drivers/gpu/drm/i915/display/intel_dp_link_training.h index 42e7fc6cb171..2066b9146762 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.h +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.h @@ -16,6 +16,12 @@ struct intel_dp; int intel_dp_read_dprx_caps(struct intel_dp *intel_dp, u8 dpcd[DP_RECEIVER_CAP_SIZE]); int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp); +void intel_dp_link_training_set_mode(struct intel_dp *intel_dp, + int link_rate, bool is_vrr); +void intel_dp_link_training_set_bw(struct intel_dp *intel_dp, + int link_bw, int rate_select, int lane_count, + bool enhanced_framing); + void intel_dp_get_adjust_train(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, enum drm_dp_phy dp_phy, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 27ce5c3f5951..15541932b809 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -43,6 +43,7 @@ #include "intel_dp_hdcp.h" #include "intel_dp_mst.h" #include "intel_dp_tunnel.h" +#include "intel_dp_link_training.h" #include "intel_dpio_phy.h" #include "intel_hdcp.h" #include "intel_hotplug.h" @@ -211,8 +212,8 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, drm_dbg_kms(&i915->drm, "Trying bpp %d\n", bpp); - link_bpp_x16 = to_bpp_x16(dsc ? bpp : - intel_dp_output_bpp(crtc_state->output_format, bpp)); + link_bpp_x16 = fxp_q4_from_int(dsc ? bpp : + intel_dp_output_bpp(crtc_state->output_format, bpp)); local_bw_overhead = intel_dp_mst_bw_overhead(crtc_state, connector, false, dsc, link_bpp_x16); @@ -289,7 +290,7 @@ static int intel_dp_mst_find_vcpi_slots_for_bpp(struct intel_encoder *encoder, if (!dsc) crtc_state->pipe_bpp = bpp; else - crtc_state->dsc.compressed_bpp_x16 = to_bpp_x16(bpp); + crtc_state->dsc.compressed_bpp_x16 = fxp_q4_from_int(bpp); drm_dbg_kms(&i915->drm, "Got %d slots for pipe bpp %d dsc %d\n", slots, bpp, dsc); } @@ -308,8 +309,8 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, * YUV420 is only half of the pipe bpp value. */ slots = intel_dp_mst_find_vcpi_slots_for_bpp(encoder, crtc_state, - to_bpp_int(limits->link.max_bpp_x16), - to_bpp_int(limits->link.min_bpp_x16), + fxp_q4_to_int(limits->link.max_bpp_x16), + fxp_q4_to_int(limits->link.min_bpp_x16), limits, conn_state, 2 * 3, false); @@ -374,11 +375,11 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder, crtc_state, max_bpp / 3); max_compressed_bpp = min(max_compressed_bpp, - to_bpp_int(limits->link.max_bpp_x16)); + fxp_q4_to_int(limits->link.max_bpp_x16)); min_compressed_bpp = intel_dp_dsc_sink_min_compressed_bpp(crtc_state); min_compressed_bpp = max(min_compressed_bpp, - to_bpp_int_roundup(limits->link.min_bpp_x16)); + fxp_q4_to_int_roundup(limits->link.min_bpp_x16)); drm_dbg_kms(&i915->drm, "DSC Sink supported compressed min bpp %d compressed max bpp %d\n", min_compressed_bpp, max_compressed_bpp); @@ -478,10 +479,10 @@ adjust_limits_for_dsc_hblank_expansion_quirk(const struct intel_connector *conne crtc->base.base.id, crtc->base.name, connector->base.base.id, connector->base.name); - if (limits->link.max_bpp_x16 < to_bpp_x16(24)) + if (limits->link.max_bpp_x16 < fxp_q4_from_int(24)) return false; - limits->link.min_bpp_x16 = to_bpp_x16(24); + limits->link.min_bpp_x16 = fxp_q4_from_int(24); return true; } @@ -489,18 +490,18 @@ adjust_limits_for_dsc_hblank_expansion_quirk(const struct intel_connector *conne drm_WARN_ON(&i915->drm, limits->min_rate != limits->max_rate); if (limits->max_rate < 540000) - min_bpp_x16 = to_bpp_x16(13); + min_bpp_x16 = fxp_q4_from_int(13); else if (limits->max_rate < 810000) - min_bpp_x16 = to_bpp_x16(10); + min_bpp_x16 = fxp_q4_from_int(10); if (limits->link.min_bpp_x16 >= min_bpp_x16) return true; drm_dbg_kms(&i915->drm, - "[CRTC:%d:%s][CONNECTOR:%d:%s] Increasing link min bpp to " BPP_X16_FMT " in DSC mode due to hblank expansion quirk\n", + "[CRTC:%d:%s][CONNECTOR:%d:%s] Increasing link min bpp to " FXP_Q4_FMT " in DSC mode due to hblank expansion quirk\n", crtc->base.base.id, crtc->base.name, connector->base.base.id, connector->base.name, - BPP_X16_ARGS(min_bpp_x16)); + FXP_Q4_ARGS(min_bpp_x16)); if (limits->link.max_bpp_x16 < min_bpp_x16) return false; @@ -1113,6 +1114,33 @@ static void intel_mst_pre_pll_enable_dp(struct intel_atomic_state *state, to_intel_crtc(pipe_config->uapi.crtc)); } +static bool intel_mst_probed_link_params_valid(struct intel_dp *intel_dp, + int link_rate, int lane_count) +{ + return intel_dp->link.mst_probed_rate == link_rate && + intel_dp->link.mst_probed_lane_count == lane_count; +} + +static void intel_mst_set_probed_link_params(struct intel_dp *intel_dp, + int link_rate, int lane_count) +{ + intel_dp->link.mst_probed_rate = link_rate; + intel_dp->link.mst_probed_lane_count = lane_count; +} + +static void intel_mst_reprobe_topology(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + if (intel_mst_probed_link_params_valid(intel_dp, + crtc_state->port_clock, crtc_state->lane_count)) + return; + + drm_dp_mst_topology_queue_probe(&intel_dp->mst_mgr); + + intel_mst_set_probed_link_params(intel_dp, + crtc_state->port_clock, crtc_state->lane_count); +} + static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config, @@ -1149,17 +1177,19 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state, intel_dp_sink_enable_decompression(state, connector, pipe_config); - if (first_mst_stream) + if (first_mst_stream) { dig_port->base.pre_enable(state, &dig_port->base, pipe_config, NULL); + intel_mst_reprobe_topology(intel_dp, pipe_config); + } + intel_dp->active_mst_links++; ret = drm_dp_add_payload_part1(&intel_dp->mst_mgr, mst_state, drm_atomic_get_mst_payload_state(mst_state, connector->port)); if (ret < 0) - drm_dbg_kms(&dev_priv->drm, "Failed to create MST payload for %s: %d\n", - connector->base.name, ret); + intel_dp_queue_modeset_retry_for_link(state, &dig_port->base, pipe_config); /* * Before Gen 12 this is not done as part of @@ -1223,6 +1253,7 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, enum transcoder trans = pipe_config->cpu_transcoder; bool first_mst_stream = intel_dp->active_mst_links == 1; struct intel_crtc *pipe_crtc; + int ret; drm_WARN_ON(&dev_priv->drm, pipe_config->has_pch_encoder); @@ -1254,8 +1285,11 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, if (first_mst_stream) intel_ddi_wait_for_fec_status(encoder, pipe_config, true); - drm_dp_add_payload_part2(&intel_dp->mst_mgr, - drm_atomic_get_mst_payload_state(mst_state, connector->port)); + ret = drm_dp_add_payload_part2(&intel_dp->mst_mgr, + drm_atomic_get_mst_payload_state(mst_state, + connector->port)); + if (ret < 0) + intel_dp_queue_modeset_retry_for_link(state, &dig_port->base, pipe_config); if (DISPLAY_VER(dev_priv) >= 12) intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, trans), @@ -1998,3 +2032,73 @@ bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state, return false; } + +/** + * intel_dp_mst_prepare_probe - Prepare an MST link for topology probing + * @intel_dp: DP port object + * + * Prepare an MST link for topology probing, programming the target + * link parameters to DPCD. This step is a requirement of the enumaration + * of path resources during probing. + */ +void intel_dp_mst_prepare_probe(struct intel_dp *intel_dp) +{ + int link_rate = intel_dp_max_link_rate(intel_dp); + int lane_count = intel_dp_max_lane_count(intel_dp); + u8 rate_select; + u8 link_bw; + + if (intel_dp->link_trained) + return; + + if (intel_mst_probed_link_params_valid(intel_dp, link_rate, lane_count)) + return; + + intel_dp_compute_rate(intel_dp, link_rate, &link_bw, &rate_select); + + intel_dp_link_training_set_mode(intel_dp, link_rate, false); + intel_dp_link_training_set_bw(intel_dp, link_bw, rate_select, lane_count, + drm_dp_enhanced_frame_cap(intel_dp->dpcd)); + + intel_mst_set_probed_link_params(intel_dp, link_rate, lane_count); +} + +/* + * intel_dp_mst_verify_dpcd_state - verify the MST SW enabled state wrt. the DPCD + * @intel_dp: DP port object + * + * Verify if @intel_dp's MST enabled SW state matches the corresponding DPCD + * state. A long HPD pulse - not long enough to be detected as a disconnected + * state - could've reset the DPCD state, which requires tearing + * down/recreating the MST topology. + * + * Returns %true if the SW MST enabled and DPCD states match, %false + * otherwise. + */ +bool intel_dp_mst_verify_dpcd_state(struct intel_dp *intel_dp) +{ + struct intel_display *display = to_intel_display(intel_dp); + struct intel_connector *connector = intel_dp->attached_connector; + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct intel_encoder *encoder = &dig_port->base; + int ret; + u8 val; + + if (!intel_dp->is_mst) + return true; + + ret = drm_dp_dpcd_readb(intel_dp->mst_mgr.aux, DP_MSTM_CTRL, &val); + + /* Adjust the expected register value for SST + SideBand. */ + if (ret < 0 || val != (DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC)) { + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s][ENCODER:%d:%s] MST mode got reset, removing topology (ret=%d, ctrl=0x%02x)\n", + connector->base.base.id, connector->base.name, + encoder->base.base.id, encoder->base.name, + ret, val); + + return false; + } + + return true; +} diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h index 8ca1d599091c..8343804ce3f8 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.h +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h @@ -27,5 +27,7 @@ int intel_dp_mst_atomic_check_link(struct intel_atomic_state *state, struct intel_link_bw_limits *limits); bool intel_dp_mst_crtc_needs_modeset(struct intel_atomic_state *state, struct intel_crtc *crtc); +void intel_dp_mst_prepare_probe(struct intel_dp *intel_dp); +bool intel_dp_mst_verify_dpcd_state(struct intel_dp *intel_dp); #endif /* __INTEL_DP_MST_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 292d163036b1..f490b2157828 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -3339,6 +3339,7 @@ static int icl_get_combo_phy_dpll(struct intel_atomic_state *state, struct intel_crtc *crtc, struct intel_encoder *encoder) { + struct intel_display *display = to_intel_display(crtc); struct drm_i915_private *i915 = to_i915(crtc->base.dev); struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); @@ -3379,7 +3380,7 @@ static int icl_get_combo_phy_dpll(struct intel_atomic_state *state, } /* Eliminate DPLLs from consideration if reserved by HTI */ - dpll_mask &= ~intel_hti_dpll_mask(i915); + dpll_mask &= ~intel_hti_dpll_mask(display); port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c index 73a1918e2537..3a6d99044828 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.c +++ b/drivers/gpu/drm/i915/display/intel_dpt.c @@ -317,3 +317,7 @@ void intel_dpt_destroy(struct i915_address_space *vm) i915_vm_put(&dpt->vm); } +u64 intel_dpt_offset(struct i915_vma *dpt_vma) +{ + return dpt_vma->node.start; +} diff --git a/drivers/gpu/drm/i915/display/intel_dpt.h b/drivers/gpu/drm/i915/display/intel_dpt.h index ff18a525bfbe..1f88b0ee17e7 100644 --- a/drivers/gpu/drm/i915/display/intel_dpt.h +++ b/drivers/gpu/drm/i915/display/intel_dpt.h @@ -6,6 +6,8 @@ #ifndef __INTEL_DPT_H__ #define __INTEL_DPT_H__ +#include <linux/types.h> + struct drm_i915_private; struct i915_address_space; @@ -20,5 +22,6 @@ void intel_dpt_suspend(struct drm_i915_private *i915); void intel_dpt_resume(struct drm_i915_private *i915); struct i915_address_space * intel_dpt_create(struct intel_framebuffer *fb); +u64 intel_dpt_offset(struct i915_vma *dpt_vma); #endif /* __INTEL_DPT_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 2ab3765f6c06..da24e041d269 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "i915_irq.h" +#include "i915_reg.h" #include "intel_crtc.h" #include "intel_de.h" #include "intel_display_types.h" @@ -42,7 +43,8 @@ struct intel_dsb { */ unsigned int ins_start_offset; - int dewake_scanline; + u32 chicken; + int hw_dewake_scanline; }; /** @@ -82,6 +84,93 @@ struct intel_dsb { #define DSB_OPCODE_POLL 0xA /* see DSB_REG_VALUE_MASK */ +static bool pre_commit_is_vrr_active(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + + /* VRR will be enabled afterwards, if necessary */ + if (intel_crtc_needs_modeset(new_crtc_state)) + return false; + + /* VRR will have been disabled during intel_pre_plane_update() */ + return old_crtc_state->vrr.enable && !intel_crtc_vrr_disabling(state, crtc); +} + +static const struct intel_crtc_state * +pre_commit_crtc_state(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + + /* + * During fastsets/etc. the transcoder is still + * running with the old timings at this point. + */ + if (intel_crtc_needs_modeset(new_crtc_state)) + return new_crtc_state; + else + return old_crtc_state; +} + +static int dsb_vtotal(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *crtc_state = pre_commit_crtc_state(state, crtc); + + if (pre_commit_is_vrr_active(state, crtc)) + return crtc_state->vrr.vmax; + else + return intel_mode_vtotal(&crtc_state->hw.adjusted_mode); +} + +static int dsb_dewake_scanline_start(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *crtc_state = pre_commit_crtc_state(state, crtc); + struct drm_i915_private *i915 = to_i915(state->base.dev); + unsigned int latency = skl_watermark_max_latency(i915, 0); + + return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode) - + intel_usecs_to_scanlines(&crtc_state->hw.adjusted_mode, latency); +} + +static int dsb_dewake_scanline_end(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *crtc_state = pre_commit_crtc_state(state, crtc); + + return intel_mode_vdisplay(&crtc_state->hw.adjusted_mode); +} + +static int dsb_scanline_to_hw(struct intel_atomic_state *state, + struct intel_crtc *crtc, int scanline) +{ + const struct intel_crtc_state *crtc_state = pre_commit_crtc_state(state, crtc); + int vtotal = dsb_vtotal(state, crtc); + + return (scanline + vtotal - intel_crtc_scanline_offset(crtc_state)) % vtotal; +} + +static u32 dsb_chicken(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + if (pre_commit_is_vrr_active(state, crtc)) + return DSB_SKIP_WAITS_EN | + DSB_CTRL_WAIT_SAFE_WINDOW | + DSB_CTRL_NO_WAIT_VBLANK | + DSB_INST_WAIT_SAFE_WINDOW | + DSB_INST_NO_WAIT_VBLANK; + else + return DSB_SKIP_WAITS_EN; +} + static bool assert_dsb_has_room(struct intel_dsb *dsb) { struct intel_crtc *crtc = dsb->crtc; @@ -281,6 +370,79 @@ void intel_dsb_nonpost_end(struct intel_dsb *dsb) intel_dsb_noop(dsb, 4); } +static void intel_dsb_emit_wait_dsl(struct intel_dsb *dsb, + u32 opcode, int lower, int upper) +{ + u64 window = ((u64)upper << DSB_SCANLINE_UPPER_SHIFT) | + ((u64)lower << DSB_SCANLINE_LOWER_SHIFT); + + intel_dsb_emit(dsb, lower_32_bits(window), + (opcode << DSB_OPCODE_SHIFT) | + upper_32_bits(window)); +} + +static void intel_dsb_wait_dsl(struct intel_atomic_state *state, + struct intel_dsb *dsb, + int lower_in, int upper_in, + int lower_out, int upper_out) +{ + struct intel_crtc *crtc = dsb->crtc; + + lower_in = dsb_scanline_to_hw(state, crtc, lower_in); + upper_in = dsb_scanline_to_hw(state, crtc, upper_in); + + lower_out = dsb_scanline_to_hw(state, crtc, lower_out); + upper_out = dsb_scanline_to_hw(state, crtc, upper_out); + + if (upper_in >= lower_in) + intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_IN, + lower_in, upper_in); + else if (upper_out >= lower_out) + intel_dsb_emit_wait_dsl(dsb, DSB_OPCODE_WAIT_DSL_OUT, + lower_out, upper_out); + else + drm_WARN_ON(crtc->base.dev, 1); /* assert_dsl_ok() should have caught it already */ +} + +static void assert_dsl_ok(struct intel_atomic_state *state, + struct intel_dsb *dsb, + int start, int end) +{ + struct intel_crtc *crtc = dsb->crtc; + int vtotal = dsb_vtotal(state, crtc); + + /* + * Waiting for the entire frame doesn't make sense, + * (IN==don't wait, OUT=wait forever). + */ + drm_WARN(crtc->base.dev, (end - start + vtotal) % vtotal == vtotal - 1, + "[CRTC:%d:%s] DSB %d bad scanline window wait: %d-%d (vt=%d)\n", + crtc->base.base.id, crtc->base.name, dsb->id, + start, end, vtotal); +} + +void intel_dsb_wait_scanline_in(struct intel_atomic_state *state, + struct intel_dsb *dsb, + int start, int end) +{ + assert_dsl_ok(state, dsb, start, end); + + intel_dsb_wait_dsl(state, dsb, + start, end, + end + 1, start - 1); +} + +void intel_dsb_wait_scanline_out(struct intel_atomic_state *state, + struct intel_dsb *dsb, + int start, int end) +{ + assert_dsl_ok(state, dsb, start, end); + + intel_dsb_wait_dsl(state, dsb, + end + 1, start - 1, + start, end); +} + static void intel_dsb_align_tail(struct intel_dsb *dsb) { u32 aligned_tail, tail; @@ -302,8 +464,10 @@ void intel_dsb_finish(struct intel_dsb *dsb) /* * DSB_FORCE_DEWAKE remains active even after DSB is * disabled, so make sure to clear it (if set during - * intel_dsb_commit()). + * intel_dsb_commit()). And clear DSB_ENABLE_DEWAKE as + * well for good measure. */ + intel_dsb_reg_write(dsb, DSB_PMCTRL(crtc->pipe, dsb->id), 0); intel_dsb_reg_write_masked(dsb, DSB_PMCTRL_2(crtc->pipe, dsb->id), DSB_FORCE_DEWAKE, 0); @@ -312,35 +476,109 @@ void intel_dsb_finish(struct intel_dsb *dsb) intel_dsb_buffer_flush_map(&dsb->dsb_buf); } -static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state) +static u32 dsb_error_int_status(struct intel_display *display) { - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - unsigned int latency = skl_watermark_max_latency(i915, 0); - int vblank_start; + u32 errors; - if (crtc_state->vrr.enable) - vblank_start = intel_vrr_vmin_vblank_start(crtc_state); - else - vblank_start = intel_mode_vblank_start(adjusted_mode); + errors = DSB_GTT_FAULT_INT_STATUS | + DSB_RSPTIMEOUT_INT_STATUS | + DSB_POLL_ERR_INT_STATUS; + + /* + * All the non-existing status bits operate as + * normal r/w bits, so any attempt to clear them + * will just end up setting them. Never do that so + * we won't mistake them for actual error interrupts. + */ + if (DISPLAY_VER(display) >= 14) + errors |= DSB_ATS_FAULT_INT_STATUS; - return max(0, vblank_start - intel_usecs_to_scanlines(adjusted_mode, latency)); + return errors; } -static u32 dsb_chicken(struct intel_crtc *crtc) +static u32 dsb_error_int_en(struct intel_display *display) { - if (crtc->mode_flags & I915_MODE_FLAG_VRR) - return DSB_SKIP_WAITS_EN | - DSB_CTRL_WAIT_SAFE_WINDOW | - DSB_CTRL_NO_WAIT_VBLANK | - DSB_INST_WAIT_SAFE_WINDOW | - DSB_INST_NO_WAIT_VBLANK; - else - return DSB_SKIP_WAITS_EN; + u32 errors; + + errors = DSB_GTT_FAULT_INT_EN | + DSB_RSPTIMEOUT_INT_EN | + DSB_POLL_ERR_INT_EN; + + if (DISPLAY_VER(display) >= 14) + errors |= DSB_ATS_FAULT_INT_EN; + + return errors; +} + +static void _intel_dsb_chain(struct intel_atomic_state *state, + struct intel_dsb *dsb, + struct intel_dsb *chained_dsb, + u32 ctrl) +{ + struct intel_display *display = to_intel_display(state->base.dev); + struct intel_crtc *crtc = dsb->crtc; + enum pipe pipe = crtc->pipe; + u32 tail; + + if (drm_WARN_ON(display->drm, dsb->id == chained_dsb->id)) + return; + + tail = chained_dsb->free_pos * 4; + if (drm_WARN_ON(display->drm, !IS_ALIGNED(tail, CACHELINE_BYTES))) + return; + + intel_dsb_reg_write(dsb, DSB_CTRL(pipe, chained_dsb->id), + ctrl | DSB_ENABLE); + + intel_dsb_reg_write(dsb, DSB_CHICKEN(pipe, chained_dsb->id), + dsb_chicken(state, crtc)); + + intel_dsb_reg_write(dsb, DSB_INTERRUPT(pipe, chained_dsb->id), + dsb_error_int_status(display) | DSB_PROG_INT_STATUS | + dsb_error_int_en(display)); + + if (ctrl & DSB_WAIT_FOR_VBLANK) { + int dewake_scanline = dsb_dewake_scanline_start(state, crtc); + int hw_dewake_scanline = dsb_scanline_to_hw(state, crtc, dewake_scanline); + + intel_dsb_reg_write(dsb, DSB_PMCTRL(pipe, chained_dsb->id), + DSB_ENABLE_DEWAKE | + DSB_SCANLINE_FOR_DEWAKE(hw_dewake_scanline)); + } + + intel_dsb_reg_write(dsb, DSB_HEAD(pipe, chained_dsb->id), + intel_dsb_buffer_ggtt_offset(&chained_dsb->dsb_buf)); + + intel_dsb_reg_write(dsb, DSB_TAIL(pipe, chained_dsb->id), + intel_dsb_buffer_ggtt_offset(&chained_dsb->dsb_buf) + tail); + + if (ctrl & DSB_WAIT_FOR_VBLANK) { + /* + * Keep DEwake alive via the first DSB, in + * case we're already past dewake_scanline, + * and thus DSB_ENABLE_DEWAKE on the second + * DSB won't do its job. + */ + intel_dsb_reg_write_masked(dsb, DSB_PMCTRL_2(pipe, dsb->id), + DSB_FORCE_DEWAKE, DSB_FORCE_DEWAKE); + + intel_dsb_wait_scanline_out(state, dsb, + dsb_dewake_scanline_start(state, crtc), + dsb_dewake_scanline_end(state, crtc)); + } +} + +void intel_dsb_chain(struct intel_atomic_state *state, + struct intel_dsb *dsb, + struct intel_dsb *chained_dsb, + bool wait_for_vblank) +{ + _intel_dsb_chain(state, dsb, chained_dsb, + wait_for_vblank ? DSB_WAIT_FOR_VBLANK : 0); } static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, - int dewake_scanline) + int hw_dewake_scanline) { struct intel_crtc *crtc = dsb->crtc; struct intel_display *display = to_intel_display(crtc->base.dev); @@ -361,15 +599,17 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, ctrl | DSB_ENABLE); intel_de_write_fw(display, DSB_CHICKEN(pipe, dsb->id), - dsb_chicken(crtc)); + dsb->chicken); + + intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id), + dsb_error_int_status(display) | DSB_PROG_INT_STATUS | + dsb_error_int_en(display)); intel_de_write_fw(display, DSB_HEAD(pipe, dsb->id), intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf)); - if (dewake_scanline >= 0) { - int diff, hw_dewake_scanline; - - hw_dewake_scanline = intel_crtc_scanline_to_hw(crtc, dewake_scanline); + if (hw_dewake_scanline >= 0) { + int diff, position; intel_de_write_fw(display, DSB_PMCTRL(pipe, dsb->id), DSB_ENABLE_DEWAKE | @@ -379,7 +619,9 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl, * Force DEwake immediately if we're already past * or close to racing past the target scanline. */ - diff = dewake_scanline - intel_get_crtc_scanline(crtc); + position = intel_de_read_fw(display, PIPEDSL(display, pipe)) & PIPEDSL_LINE_MASK; + + diff = hw_dewake_scanline - position; intel_de_write_fw(display, DSB_PMCTRL_2(pipe, dsb->id), (diff >= 0 && diff < 5 ? DSB_FORCE_DEWAKE : 0) | DSB_BLOCK_DEWAKE_EXTENSION); @@ -401,7 +643,7 @@ void intel_dsb_commit(struct intel_dsb *dsb, { _intel_dsb_commit(dsb, wait_for_vblank ? DSB_WAIT_FOR_VBLANK : 0, - wait_for_vblank ? dsb->dewake_scanline : -1); + wait_for_vblank ? dsb->hw_dewake_scanline : -1); } void intel_dsb_wait(struct intel_dsb *dsb) @@ -430,6 +672,9 @@ void intel_dsb_wait(struct intel_dsb *dsb) dsb->free_pos = 0; dsb->ins_start_offset = 0; intel_de_write_fw(display, DSB_CTRL(pipe, dsb->id), 0); + + intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb->id), + dsb_error_int_status(display) | DSB_PROG_INT_STATUS); } /** @@ -451,8 +696,6 @@ struct intel_dsb *intel_dsb_prepare(struct intel_atomic_state *state, unsigned int max_cmds) { struct drm_i915_private *i915 = to_i915(state->base.dev); - const struct intel_crtc_state *crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); intel_wakeref_t wakeref; struct intel_dsb *dsb; unsigned int size; @@ -486,7 +729,10 @@ struct intel_dsb *intel_dsb_prepare(struct intel_atomic_state *state, dsb->size = size / 4; /* in dwords */ dsb->free_pos = 0; dsb->ins_start_offset = 0; - dsb->dewake_scanline = intel_dsb_dewake_scanline(crtc_state); + + dsb->chicken = dsb_chicken(state, crtc); + dsb->hw_dewake_scanline = + dsb_scanline_to_hw(state, crtc, dsb_dewake_scanline_start(state, crtc)); return dsb; @@ -513,3 +759,18 @@ void intel_dsb_cleanup(struct intel_dsb *dsb) intel_dsb_buffer_cleanup(&dsb->dsb_buf); kfree(dsb); } + +void intel_dsb_irq_handler(struct intel_display *display, + enum pipe pipe, enum intel_dsb_id dsb_id) +{ + struct intel_crtc *crtc = intel_crtc_for_pipe(to_i915(display->drm), pipe); + u32 tmp, errors; + + tmp = intel_de_read_fw(display, DSB_INTERRUPT(pipe, dsb_id)); + intel_de_write_fw(display, DSB_INTERRUPT(pipe, dsb_id), tmp); + + errors = tmp & dsb_error_int_status(display); + if (errors) + drm_err(display->drm, "[CRTC:%d:%s] DSB %d error interrupt: 0x%x\n", + crtc->base.base.id, crtc->base.name, dsb_id, errors); +} diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h index bb42749f2ea4..c352c12aa59f 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.h +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -13,8 +13,11 @@ struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; +struct intel_display; struct intel_dsb; +enum pipe; + enum intel_dsb_id { INTEL_DSB_0, INTEL_DSB_1, @@ -36,9 +39,22 @@ void intel_dsb_reg_write_masked(struct intel_dsb *dsb, void intel_dsb_noop(struct intel_dsb *dsb, int count); void intel_dsb_nonpost_start(struct intel_dsb *dsb); void intel_dsb_nonpost_end(struct intel_dsb *dsb); +void intel_dsb_wait_scanline_in(struct intel_atomic_state *state, + struct intel_dsb *dsb, + int lower, int upper); +void intel_dsb_wait_scanline_out(struct intel_atomic_state *state, + struct intel_dsb *dsb, + int lower, int upper); +void intel_dsb_chain(struct intel_atomic_state *state, + struct intel_dsb *dsb, + struct intel_dsb *chained_dsb, + bool wait_for_vblank); void intel_dsb_commit(struct intel_dsb *dsb, bool wait_for_vblank); void intel_dsb_wait(struct intel_dsb *dsb); +void intel_dsb_irq_handler(struct intel_display *display, + enum pipe pipe, enum intel_dsb_id dsb_id); + #endif diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h b/drivers/gpu/drm/i915/display/intel_dsi.h index e99c94edfaae..e8ba4ccd99d3 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi.h +++ b/drivers/gpu/drm/i915/display/intel_dsi.h @@ -66,7 +66,7 @@ struct intel_dsi { /* number of DSI lanes */ unsigned int lane_count; - /* i2c bus associated with the slave device */ + /* i2c bus associated with the target device */ int i2c_bus_num; /* diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 072ef1d62bda..d8951464bd2b 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -56,7 +56,7 @@ #define MIPI_PORT_SHIFT 3 struct i2c_adapter_lookup { - u16 slave_addr; + u16 target_addr; struct intel_dsi *intel_dsi; acpi_handle dev_handle; }; @@ -443,7 +443,7 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) if (!i2c_acpi_get_i2c_resource(ares, &sb)) return 1; - if (lookup->slave_addr != sb->slave_address) + if (lookup->target_addr != sb->slave_address) return 1; status = acpi_get_handle(lookup->dev_handle, @@ -460,12 +460,12 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) } static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, - const u16 slave_addr) + const u16 target_addr) { struct drm_device *drm_dev = intel_dsi->base.base.dev; struct acpi_device *adev = ACPI_COMPANION(drm_dev->dev); struct i2c_adapter_lookup lookup = { - .slave_addr = slave_addr, + .target_addr = target_addr, .intel_dsi = intel_dsi, .dev_handle = acpi_device_handle(adev), }; @@ -476,7 +476,7 @@ static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, } #else static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, - const u16 slave_addr) + const u16 target_addr) { } #endif @@ -488,17 +488,17 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) struct i2c_msg msg; int ret; u8 vbt_i2c_bus_num = *(data + 2); - u16 slave_addr = *(u16 *)(data + 3); + u16 target_addr = *(u16 *)(data + 3); u8 reg_offset = *(data + 5); u8 payload_size = *(data + 6); u8 *payload_data; - drm_dbg_kms(&i915->drm, "bus %d client-addr 0x%02x reg 0x%02x data %*ph\n", - vbt_i2c_bus_num, slave_addr, reg_offset, payload_size, data + 7); + drm_dbg_kms(&i915->drm, "bus %d target-addr 0x%02x reg 0x%02x data %*ph\n", + vbt_i2c_bus_num, target_addr, reg_offset, payload_size, data + 7); if (intel_dsi->i2c_bus_num < 0) { intel_dsi->i2c_bus_num = vbt_i2c_bus_num; - i2c_acpi_find_adapter(intel_dsi, slave_addr); + i2c_acpi_find_adapter(intel_dsi, target_addr); } adapter = i2c_get_adapter(intel_dsi->i2c_bus_num); @@ -514,7 +514,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) payload_data[0] = reg_offset; memcpy(&payload_data[1], (data + 7), payload_size); - msg.addr = slave_addr; + msg.addr = target_addr; msg.flags = 0; msg.len = payload_size + 1; msg.buf = payload_data; diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 091824334f26..12e7628cbecf 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -60,42 +60,42 @@ static const struct intel_dvo_device intel_dvo_devices[] = { .type = INTEL_DVO_CHIP_TMDS, .name = "sil164", .port = PORT_C, - .slave_addr = SIL164_ADDR, + .target_addr = SIL164_ADDR, .dev_ops = &sil164_ops, }, { .type = INTEL_DVO_CHIP_TMDS, .name = "ch7xxx", .port = PORT_C, - .slave_addr = CH7xxx_ADDR, + .target_addr = CH7xxx_ADDR, .dev_ops = &ch7xxx_ops, }, { .type = INTEL_DVO_CHIP_TMDS, .name = "ch7xxx", .port = PORT_C, - .slave_addr = 0x75, /* For some ch7010 */ + .target_addr = 0x75, /* For some ch7010 */ .dev_ops = &ch7xxx_ops, }, { .type = INTEL_DVO_CHIP_LVDS, .name = "ivch", .port = PORT_A, - .slave_addr = 0x02, /* Might also be 0x44, 0x84, 0xc4 */ + .target_addr = 0x02, /* Might also be 0x44, 0x84, 0xc4 */ .dev_ops = &ivch_ops, }, { .type = INTEL_DVO_CHIP_TMDS, .name = "tfp410", .port = PORT_C, - .slave_addr = TFP410_ADDR, + .target_addr = TFP410_ADDR, .dev_ops = &tfp410_ops, }, { .type = INTEL_DVO_CHIP_LVDS, .name = "ch7017", .port = PORT_C, - .slave_addr = 0x75, + .target_addr = 0x75, .gpio = GMBUS_PIN_DPB, .dev_ops = &ch7017_ops, }, @@ -103,7 +103,7 @@ static const struct intel_dvo_device intel_dvo_devices[] = { .type = INTEL_DVO_CHIP_LVDS_NO_FIXED, .name = "ns2501", .port = PORT_B, - .slave_addr = NS2501_ADDR, + .target_addr = NS2501_ADDR, .dev_ops = &ns2501_ops, }, }; diff --git a/drivers/gpu/drm/i915/display/intel_dvo_dev.h b/drivers/gpu/drm/i915/display/intel_dvo_dev.h index af7b04539b93..4bf476656b8c 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo_dev.h +++ b/drivers/gpu/drm/i915/display/intel_dvo_dev.h @@ -38,7 +38,7 @@ struct intel_dvo_device { enum port port; /* GPIO register used for i2c bus to control this device */ u32 gpio; - int slave_addr; + int target_addr; const struct intel_dvo_dev_ops *dev_ops; void *dev_priv; diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index f23547a88b1f..5be7bb43e2e0 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -163,6 +163,14 @@ struct intel_modifier_desc { static const struct intel_modifier_desc intel_modifiers[] = { { + .modifier = I915_FORMAT_MOD_4_TILED_LNL_CCS, + .display_ver = { 20, -1 }, + .plane_caps = INTEL_PLANE_CAP_TILING_4, + }, { + .modifier = I915_FORMAT_MOD_4_TILED_BMG_CCS, + .display_ver = { 14, -1 }, + .plane_caps = INTEL_PLANE_CAP_TILING_4 | INTEL_PLANE_CAP_NEED64K_PHYS, + }, { .modifier = I915_FORMAT_MOD_4_TILED_MTL_MC_CCS, .display_ver = { 14, 14 }, .plane_caps = INTEL_PLANE_CAP_TILING_4 | INTEL_PLANE_CAP_CCS_MC, @@ -412,6 +420,24 @@ bool intel_fb_is_mc_ccs_modifier(u64 modifier) INTEL_PLANE_CAP_CCS_MC); } +/** + * intel_fb_needs_64k_phys: Check if modifier requires 64k physical placement. + * @modifier: Modifier to check + * + * Returns: + * Returns %true if @modifier requires 64k aligned physical pages. + */ +bool intel_fb_needs_64k_phys(u64 modifier) +{ + const struct intel_modifier_desc *md = lookup_modifier_or_null(modifier); + + if (!md) + return false; + + return plane_caps_contain_any(md->plane_caps, + INTEL_PLANE_CAP_NEED64K_PHYS); +} + static bool check_modifier_display_ver_range(const struct intel_modifier_desc *md, u8 display_ver_from, u8 display_ver_until) { @@ -437,6 +463,14 @@ static bool plane_has_modifier(struct drm_i915_private *i915, HAS_FLAT_CCS(i915) != !md->ccs.packed_aux_planes) return false; + if (md->modifier == I915_FORMAT_MOD_4_TILED_BMG_CCS && + (GRAPHICS_VER(i915) < 20 || !IS_DGFX(i915))) + return false; + + if (md->modifier == I915_FORMAT_MOD_4_TILED_LNL_CCS && + (GRAPHICS_VER(i915) < 20 || IS_DGFX(i915))) + return false; + return true; } @@ -653,6 +687,8 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) return 128; else return 512; + case I915_FORMAT_MOD_4_TILED_BMG_CCS: + case I915_FORMAT_MOD_4_TILED_LNL_CCS: case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS: case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC: case I915_FORMAT_MOD_4_TILED_DG2_MC_CCS: diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index 6dee0c8b7f22..10de437e8ef8 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -28,11 +28,13 @@ struct intel_plane_state; #define INTEL_PLANE_CAP_TILING_Y BIT(4) #define INTEL_PLANE_CAP_TILING_Yf BIT(5) #define INTEL_PLANE_CAP_TILING_4 BIT(6) +#define INTEL_PLANE_CAP_NEED64K_PHYS BIT(7) bool intel_fb_is_tiled_modifier(u64 modifier); bool intel_fb_is_ccs_modifier(u64 modifier); bool intel_fb_is_rc_ccs_cc_modifier(u64 modifier); bool intel_fb_is_mc_ccs_modifier(u64 modifier); +bool intel_fb_needs_64k_phys(u64 modifier); bool intel_fb_is_ccs_aux_plane(const struct drm_framebuffer *fb, int color_plane); int intel_fb_rc_ccs_cc_plane(const struct drm_framebuffer *fb); diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 8488f82143a4..52b79bacef4d 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -61,13 +61,13 @@ #include "intel_fbc_regs.h" #include "intel_frontbuffer.h" -#define for_each_fbc_id(__dev_priv, __fbc_id) \ +#define for_each_fbc_id(__display, __fbc_id) \ for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; (__fbc_id)++) \ - for_each_if(DISPLAY_RUNTIME_INFO(__dev_priv)->fbc_mask & BIT(__fbc_id)) + for_each_if(DISPLAY_RUNTIME_INFO(__display)->fbc_mask & BIT(__fbc_id)) -#define for_each_intel_fbc(__dev_priv, __fbc, __fbc_id) \ - for_each_fbc_id((__dev_priv), (__fbc_id)) \ - for_each_if((__fbc) = (__dev_priv)->display.fbc[(__fbc_id)]) +#define for_each_intel_fbc(__display, __fbc, __fbc_id) \ + for_each_fbc_id((__display), (__fbc_id)) \ + for_each_if((__fbc) = (__display)->fbc[(__fbc_id)]) struct intel_fbc_funcs { void (*activate)(struct intel_fbc *fbc); @@ -90,7 +90,7 @@ struct intel_fbc_state { }; struct intel_fbc { - struct drm_i915_private *i915; + struct intel_display *display; const struct intel_fbc_funcs *funcs; /* @@ -140,21 +140,24 @@ static unsigned int intel_fbc_plane_stride(const struct intel_plane_state *plane return stride; } +static unsigned int intel_fbc_cfb_cpp(void) +{ + return 4; /* FBC always 4 bytes per pixel */ +} + /* plane stride based cfb stride in bytes, assuming 1:1 compression limit */ -static unsigned int _intel_fbc_cfb_stride(const struct intel_plane_state *plane_state) +static unsigned int intel_fbc_plane_cfb_stride(const struct intel_plane_state *plane_state) { - unsigned int cpp = 4; /* FBC always 4 bytes per pixel */ + unsigned int cpp = intel_fbc_cfb_cpp(); return intel_fbc_plane_stride(plane_state) * cpp; } /* minimum acceptable cfb stride in bytes, assuming 1:1 compression limit */ -static unsigned int skl_fbc_min_cfb_stride(const struct intel_plane_state *plane_state) +static unsigned int skl_fbc_min_cfb_stride(struct intel_display *display, + unsigned int cpp, unsigned int width) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); unsigned int limit = 4; /* 1:4 compression limit is the worst case */ - unsigned int cpp = 4; /* FBC always 4 bytes per pixel */ - unsigned int width = drm_rect_width(&plane_state->uapi.src) >> 16; unsigned int height = 4; /* FBC segment is 4 lines */ unsigned int stride; @@ -165,7 +168,7 @@ static unsigned int skl_fbc_min_cfb_stride(const struct intel_plane_state *plane * Wa_16011863758: icl+ * Avoid some hardware segment address miscalculation. */ - if (DISPLAY_VER(i915) >= 11) + if (DISPLAY_VER(display) >= 11) stride += 64; /* @@ -179,40 +182,67 @@ static unsigned int skl_fbc_min_cfb_stride(const struct intel_plane_state *plane } /* properly aligned cfb stride in bytes, assuming 1:1 compression limit */ -static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_state) +static unsigned int _intel_fbc_cfb_stride(struct intel_display *display, + unsigned int cpp, unsigned int width, + unsigned int stride) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); - unsigned int stride = _intel_fbc_cfb_stride(plane_state); - /* * At least some of the platforms require each 4 line segment to * be 512 byte aligned. Aligning each line to 512 bytes guarantees * that regardless of the compression limit we choose later. */ - if (DISPLAY_VER(i915) >= 9) - return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(plane_state)); + if (DISPLAY_VER(display) >= 9) + return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(display, cpp, width)); else return stride; } -static unsigned int intel_fbc_cfb_size(const struct intel_plane_state *plane_state) +static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_state) +{ + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + unsigned int stride = intel_fbc_plane_cfb_stride(plane_state); + unsigned int width = drm_rect_width(&plane_state->uapi.src) >> 16; + unsigned int cpp = intel_fbc_cfb_cpp(); + + return _intel_fbc_cfb_stride(display, cpp, width, stride); +} + +/* + * Maximum height the hardware will compress, on HSW+ + * additional lines (up to the actual plane height) will + * remain uncompressed. + */ +static unsigned int intel_fbc_max_cfb_height(struct intel_display *display) +{ + struct drm_i915_private *i915 = to_i915(display->drm); + + if (DISPLAY_VER(display) >= 8) + return 2560; + else if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) + return 2048; + else + return 1536; +} + +static unsigned int _intel_fbc_cfb_size(struct intel_display *display, + unsigned int height, unsigned int stride) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); - int lines = drm_rect_height(&plane_state->uapi.src) >> 16; + return min(height, intel_fbc_max_cfb_height(display)) * stride; +} - if (DISPLAY_VER(i915) == 7) - lines = min(lines, 2048); - else if (DISPLAY_VER(i915) >= 8) - lines = min(lines, 2560); +static unsigned int intel_fbc_cfb_size(const struct intel_plane_state *plane_state) +{ + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + unsigned int height = drm_rect_height(&plane_state->uapi.src) >> 16; - return lines * intel_fbc_cfb_stride(plane_state); + return _intel_fbc_cfb_size(display, height, intel_fbc_cfb_stride(plane_state)); } static u16 intel_fbc_override_cfb_stride(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); unsigned int stride_aligned = intel_fbc_cfb_stride(plane_state); - unsigned int stride = _intel_fbc_cfb_stride(plane_state); + unsigned int stride = intel_fbc_plane_cfb_stride(plane_state); const struct drm_framebuffer *fb = plane_state->hw.fb; /* @@ -223,23 +253,31 @@ static u16 intel_fbc_override_cfb_stride(const struct intel_plane_state *plane_s * we always need to use the override there. */ if (stride != stride_aligned || - (DISPLAY_VER(i915) == 9 && fb->modifier == DRM_FORMAT_MOD_LINEAR)) + (DISPLAY_VER(display) == 9 && fb->modifier == DRM_FORMAT_MOD_LINEAR)) return stride_aligned * 4 / 64; return 0; } +static bool intel_fbc_has_fences(struct intel_display *display) +{ + struct drm_i915_private __maybe_unused *i915 = to_i915(display->drm); + + return intel_gt_support_legacy_fencing(to_gt(i915)); +} + static u32 i8xx_fbc_ctl(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); unsigned int cfb_stride; u32 fbc_ctl; cfb_stride = fbc_state->cfb_stride / fbc->limit; /* FBC_CTL wants 32B or 64B units */ - if (DISPLAY_VER(i915) == 2) + if (DISPLAY_VER(display) == 2) cfb_stride = (cfb_stride / 32) - 1; else cfb_stride = (cfb_stride / 64) - 1; @@ -273,21 +311,21 @@ static u32 i965_fbc_ctl2(struct intel_fbc *fbc) static void i8xx_fbc_deactivate(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 fbc_ctl; /* Disable compression */ - fbc_ctl = intel_de_read(i915, FBC_CONTROL); + fbc_ctl = intel_de_read(display, FBC_CONTROL); if ((fbc_ctl & FBC_CTL_EN) == 0) return; fbc_ctl &= ~FBC_CTL_EN; - intel_de_write(i915, FBC_CONTROL, fbc_ctl); + intel_de_write(display, FBC_CONTROL, fbc_ctl); /* Wait for compressing bit to clear */ - if (intel_de_wait_for_clear(i915, FBC_STATUS, + if (intel_de_wait_for_clear(display, FBC_STATUS, FBC_STAT_COMPRESSING, 10)) { - drm_dbg_kms(&i915->drm, "FBC idle timed out\n"); + drm_dbg_kms(display->drm, "FBC idle timed out\n"); return; } } @@ -295,32 +333,32 @@ static void i8xx_fbc_deactivate(struct intel_fbc *fbc) static void i8xx_fbc_activate(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; int i; /* Clear old tags */ for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++) - intel_de_write(i915, FBC_TAG(i), 0); + intel_de_write(display, FBC_TAG(i), 0); - if (DISPLAY_VER(i915) == 4) { - intel_de_write(i915, FBC_CONTROL2, + if (DISPLAY_VER(display) == 4) { + intel_de_write(display, FBC_CONTROL2, i965_fbc_ctl2(fbc)); - intel_de_write(i915, FBC_FENCE_OFF, + intel_de_write(display, FBC_FENCE_OFF, fbc_state->fence_y_offset); } - intel_de_write(i915, FBC_CONTROL, + intel_de_write(display, FBC_CONTROL, FBC_CTL_EN | i8xx_fbc_ctl(fbc)); } static bool i8xx_fbc_is_active(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, FBC_CONTROL) & FBC_CTL_EN; + return intel_de_read(fbc->display, FBC_CONTROL) & FBC_CTL_EN; } static bool i8xx_fbc_is_compressing(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, FBC_STATUS) & + return intel_de_read(fbc->display, FBC_STATUS) & (FBC_STAT_COMPRESSING | FBC_STAT_COMPRESSED); } @@ -328,7 +366,7 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc) { struct intel_fbc_state *fbc_state = &fbc->state; enum i9xx_plane_id i9xx_plane = fbc_state->plane->i9xx_plane; - struct drm_i915_private *dev_priv = fbc->i915; + struct drm_i915_private *dev_priv = to_i915(fbc->display->drm); intel_de_write_fw(dev_priv, DSPADDR(dev_priv, i9xx_plane), intel_de_read_fw(dev_priv, DSPADDR(dev_priv, i9xx_plane))); @@ -336,13 +374,14 @@ static void i8xx_fbc_nuke(struct intel_fbc *fbc) static void i8xx_fbc_program_cfb(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, range_overflows_end_t(u64, i915_gem_stolen_area_address(i915), i915_gem_stolen_node_offset(&fbc->compressed_fb), U32_MAX)); - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, range_overflows_end_t(u64, i915_gem_stolen_area_address(i915), i915_gem_stolen_node_offset(&fbc->compressed_llb), U32_MAX)); @@ -365,7 +404,7 @@ static void i965_fbc_nuke(struct intel_fbc *fbc) { struct intel_fbc_state *fbc_state = &fbc->state; enum i9xx_plane_id i9xx_plane = fbc_state->plane->i9xx_plane; - struct drm_i915_private *dev_priv = fbc->i915; + struct drm_i915_private *dev_priv = to_i915(fbc->display->drm); intel_de_write_fw(dev_priv, DSPSURF(dev_priv, i9xx_plane), intel_de_read_fw(dev_priv, DSPSURF(dev_priv, i9xx_plane))); @@ -398,7 +437,8 @@ static u32 g4x_dpfc_ctl_limit(struct intel_fbc *fbc) static u32 g4x_dpfc_ctl(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); u32 dpfc_ctl; dpfc_ctl = g4x_dpfc_ctl_limit(fbc) | @@ -410,7 +450,7 @@ static u32 g4x_dpfc_ctl(struct intel_fbc *fbc) if (fbc_state->fence_id >= 0) { dpfc_ctl |= DPFC_CTL_FENCE_EN_G4X; - if (DISPLAY_VER(i915) < 6) + if (DISPLAY_VER(display) < 6) dpfc_ctl |= DPFC_CTL_FENCENO(fbc_state->fence_id); } @@ -420,43 +460,43 @@ static u32 g4x_dpfc_ctl(struct intel_fbc *fbc) static void g4x_fbc_activate(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; - intel_de_write(i915, DPFC_FENCE_YOFF, + intel_de_write(display, DPFC_FENCE_YOFF, fbc_state->fence_y_offset); - intel_de_write(i915, DPFC_CONTROL, + intel_de_write(display, DPFC_CONTROL, DPFC_CTL_EN | g4x_dpfc_ctl(fbc)); } static void g4x_fbc_deactivate(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 dpfc_ctl; /* Disable compression */ - dpfc_ctl = intel_de_read(i915, DPFC_CONTROL); + dpfc_ctl = intel_de_read(display, DPFC_CONTROL); if (dpfc_ctl & DPFC_CTL_EN) { dpfc_ctl &= ~DPFC_CTL_EN; - intel_de_write(i915, DPFC_CONTROL, dpfc_ctl); + intel_de_write(display, DPFC_CONTROL, dpfc_ctl); } } static bool g4x_fbc_is_active(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, DPFC_CONTROL) & DPFC_CTL_EN; + return intel_de_read(fbc->display, DPFC_CONTROL) & DPFC_CTL_EN; } static bool g4x_fbc_is_compressing(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, DPFC_STATUS) & DPFC_COMP_SEG_MASK; + return intel_de_read(fbc->display, DPFC_STATUS) & DPFC_COMP_SEG_MASK; } static void g4x_fbc_program_cfb(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; - intel_de_write(i915, DPFC_CB_BASE, + intel_de_write(display, DPFC_CB_BASE, i915_gem_stolen_node_offset(&fbc->compressed_fb)); } @@ -472,43 +512,43 @@ static const struct intel_fbc_funcs g4x_fbc_funcs = { static void ilk_fbc_activate(struct intel_fbc *fbc) { struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; - intel_de_write(i915, ILK_DPFC_FENCE_YOFF(fbc->id), + intel_de_write(display, ILK_DPFC_FENCE_YOFF(fbc->id), fbc_state->fence_y_offset); - intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id), + intel_de_write(display, ILK_DPFC_CONTROL(fbc->id), DPFC_CTL_EN | g4x_dpfc_ctl(fbc)); } static void ilk_fbc_deactivate(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 dpfc_ctl; /* Disable compression */ - dpfc_ctl = intel_de_read(i915, ILK_DPFC_CONTROL(fbc->id)); + dpfc_ctl = intel_de_read(display, ILK_DPFC_CONTROL(fbc->id)); if (dpfc_ctl & DPFC_CTL_EN) { dpfc_ctl &= ~DPFC_CTL_EN; - intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl); + intel_de_write(display, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl); } } static bool ilk_fbc_is_active(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, ILK_DPFC_CONTROL(fbc->id)) & DPFC_CTL_EN; + return intel_de_read(fbc->display, ILK_DPFC_CONTROL(fbc->id)) & DPFC_CTL_EN; } static bool ilk_fbc_is_compressing(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, ILK_DPFC_STATUS(fbc->id)) & DPFC_COMP_SEG_MASK; + return intel_de_read(fbc->display, ILK_DPFC_STATUS(fbc->id)) & DPFC_COMP_SEG_MASK; } static void ilk_fbc_program_cfb(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; - intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), + intel_de_write(display, ILK_DPFC_CB_BASE(fbc->id), i915_gem_stolen_node_offset(&fbc->compressed_fb)); } @@ -524,14 +564,14 @@ static const struct intel_fbc_funcs ilk_fbc_funcs = { static void snb_fbc_program_fence(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 ctl = 0; if (fbc_state->fence_id >= 0) ctl = SNB_DPFC_FENCE_EN | SNB_DPFC_FENCENO(fbc_state->fence_id); - intel_de_write(i915, SNB_DPFC_CTL_SA, ctl); - intel_de_write(i915, SNB_DPFC_CPU_FENCE_OFFSET, fbc_state->fence_y_offset); + intel_de_write(display, SNB_DPFC_CTL_SA, ctl); + intel_de_write(display, SNB_DPFC_CPU_FENCE_OFFSET, fbc_state->fence_y_offset); } static void snb_fbc_activate(struct intel_fbc *fbc) @@ -543,10 +583,10 @@ static void snb_fbc_activate(struct intel_fbc *fbc) static void snb_fbc_nuke(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; - intel_de_write(i915, MSG_FBC_REND_STATE(fbc->id), FBC_REND_NUKE); - intel_de_posting_read(i915, MSG_FBC_REND_STATE(fbc->id)); + intel_de_write(display, MSG_FBC_REND_STATE(fbc->id), FBC_REND_NUKE); + intel_de_posting_read(display, MSG_FBC_REND_STATE(fbc->id)); } static const struct intel_fbc_funcs snb_fbc_funcs = { @@ -561,20 +601,20 @@ static const struct intel_fbc_funcs snb_fbc_funcs = { static void glk_fbc_program_cfb_stride(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 val = 0; if (fbc_state->override_cfb_stride) val |= FBC_STRIDE_OVERRIDE | FBC_STRIDE(fbc_state->override_cfb_stride / fbc->limit); - intel_de_write(i915, GLK_FBC_STRIDE(fbc->id), val); + intel_de_write(display, GLK_FBC_STRIDE(fbc->id), val); } static void skl_fbc_program_cfb_stride(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 val = 0; /* Display WA #0529: skl, kbl, bxt. */ @@ -582,7 +622,7 @@ static void skl_fbc_program_cfb_stride(struct intel_fbc *fbc) val |= CHICKEN_FBC_STRIDE_OVERRIDE | CHICKEN_FBC_STRIDE(fbc_state->override_cfb_stride / fbc->limit); - intel_de_rmw(i915, CHICKEN_MISC_4, + intel_de_rmw(display, CHICKEN_MISC_4, CHICKEN_FBC_STRIDE_OVERRIDE | CHICKEN_FBC_STRIDE_MASK, val); } @@ -590,7 +630,8 @@ static void skl_fbc_program_cfb_stride(struct intel_fbc *fbc) static u32 ivb_dpfc_ctl(struct intel_fbc *fbc) { const struct intel_fbc_state *fbc_state = &fbc->state; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); u32 dpfc_ctl; dpfc_ctl = g4x_dpfc_ctl_limit(fbc); @@ -598,7 +639,7 @@ static u32 ivb_dpfc_ctl(struct intel_fbc *fbc) if (IS_IVYBRIDGE(i915)) dpfc_ctl |= DPFC_CTL_PLANE_IVB(fbc_state->plane->i9xx_plane); - if (DISPLAY_VER(i915) >= 20) + if (DISPLAY_VER(display) >= 20) dpfc_ctl |= DPFC_CTL_PLANE_BINDING(fbc_state->plane->id); if (fbc_state->fence_id >= 0) @@ -612,35 +653,35 @@ static u32 ivb_dpfc_ctl(struct intel_fbc *fbc) static void ivb_fbc_activate(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; u32 dpfc_ctl; - if (DISPLAY_VER(i915) >= 10) + if (DISPLAY_VER(display) >= 10) glk_fbc_program_cfb_stride(fbc); - else if (DISPLAY_VER(i915) == 9) + else if (DISPLAY_VER(display) == 9) skl_fbc_program_cfb_stride(fbc); - if (intel_gt_support_legacy_fencing(to_gt(i915))) + if (intel_fbc_has_fences(display)) snb_fbc_program_fence(fbc); /* wa_14019417088 Alternative WA*/ dpfc_ctl = ivb_dpfc_ctl(fbc); - if (DISPLAY_VER(i915) >= 20) - intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl); + if (DISPLAY_VER(display) >= 20) + intel_de_write(display, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl); - intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id), + intel_de_write(display, ILK_DPFC_CONTROL(fbc->id), DPFC_CTL_EN | dpfc_ctl); } static bool ivb_fbc_is_compressing(struct intel_fbc *fbc) { - return intel_de_read(fbc->i915, ILK_DPFC_STATUS2(fbc->id)) & DPFC_COMP_SEG_MASK_IVB; + return intel_de_read(fbc->display, ILK_DPFC_STATUS2(fbc->id)) & DPFC_COMP_SEG_MASK_IVB; } static void ivb_fbc_set_false_color(struct intel_fbc *fbc, bool enable) { - intel_de_rmw(fbc->i915, ILK_DPFC_CONTROL(fbc->id), + intel_de_rmw(fbc->display, ILK_DPFC_CONTROL(fbc->id), DPFC_CTL_FALSE_COLOR, enable ? DPFC_CTL_FALSE_COLOR : 0); } @@ -685,10 +726,10 @@ static bool intel_fbc_is_compressing(struct intel_fbc *fbc) static void intel_fbc_nuke(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; lockdep_assert_held(&fbc->lock); - drm_WARN_ON(&i915->drm, fbc->flip_pending); + drm_WARN_ON(display->drm, fbc->flip_pending); trace_intel_fbc_nuke(fbc->state.plane); @@ -715,16 +756,19 @@ static void intel_fbc_deactivate(struct intel_fbc *fbc, const char *reason) fbc->no_fbc_reason = reason; } -static u64 intel_fbc_cfb_base_max(struct drm_i915_private *i915) +static u64 intel_fbc_cfb_base_max(struct intel_display *display) { - if (DISPLAY_VER(i915) >= 5 || IS_G4X(i915)) + struct drm_i915_private *i915 = to_i915(display->drm); + + if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) return BIT_ULL(28); else return BIT_ULL(32); } -static u64 intel_fbc_stolen_end(struct drm_i915_private *i915) +static u64 intel_fbc_stolen_end(struct intel_display *display) { + struct drm_i915_private __maybe_unused *i915 = to_i915(display->drm); u64 end; /* The FBC hardware for BDW/SKL doesn't have access to the stolen @@ -732,12 +776,12 @@ static u64 intel_fbc_stolen_end(struct drm_i915_private *i915) * If we enable FBC using a CFB on that memory range we'll get FIFO * underruns, even if that range is not reserved by the BIOS. */ if (IS_BROADWELL(i915) || - (DISPLAY_VER(i915) == 9 && !IS_BROXTON(i915))) + (DISPLAY_VER(display) == 9 && !IS_BROXTON(i915))) end = i915_gem_stolen_area_size(i915) - 8 * 1024 * 1024; else end = U64_MAX; - return min(end, intel_fbc_cfb_base_max(i915)); + return min(end, intel_fbc_cfb_base_max(display)); } static int intel_fbc_min_limit(const struct intel_plane_state *plane_state) @@ -745,8 +789,10 @@ static int intel_fbc_min_limit(const struct intel_plane_state *plane_state) return plane_state->hw.fb->format->cpp[0] == 2 ? 2 : 1; } -static int intel_fbc_max_limit(struct drm_i915_private *i915) +static int intel_fbc_max_limit(struct intel_display *display) { + struct drm_i915_private *i915 = to_i915(display->drm); + /* WaFbcOnly1to1Ratio:ctg */ if (IS_G4X(i915)) return 1; @@ -761,8 +807,9 @@ static int intel_fbc_max_limit(struct drm_i915_private *i915) static int find_compression_limit(struct intel_fbc *fbc, unsigned int size, int min_limit) { - struct drm_i915_private *i915 = fbc->i915; - u64 end = intel_fbc_stolen_end(i915); + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); + u64 end = intel_fbc_stolen_end(display); int ret, limit = min_limit; size /= limit; @@ -773,7 +820,7 @@ static int find_compression_limit(struct intel_fbc *fbc, if (ret == 0) return limit; - for (; limit <= intel_fbc_max_limit(i915); limit <<= 1) { + for (; limit <= intel_fbc_max_limit(display); limit <<= 1) { ret = i915_gem_stolen_insert_node_in_range(i915, &fbc->compressed_fb, size >>= 1, 4096, 0, end); if (ret == 0) @@ -786,15 +833,16 @@ static int find_compression_limit(struct intel_fbc *fbc, static int intel_fbc_alloc_cfb(struct intel_fbc *fbc, unsigned int size, int min_limit) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); int ret; - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, i915_gem_stolen_node_allocated(&fbc->compressed_fb)); - drm_WARN_ON(&i915->drm, + drm_WARN_ON(display->drm, i915_gem_stolen_node_allocated(&fbc->compressed_llb)); - if (DISPLAY_VER(i915) < 5 && !IS_G4X(i915)) { + if (DISPLAY_VER(display) < 5 && !IS_G4X(i915)) { ret = i915_gem_stolen_insert_node(i915, &fbc->compressed_llb, 4096, 4096); if (ret) @@ -805,12 +853,12 @@ static int intel_fbc_alloc_cfb(struct intel_fbc *fbc, if (!ret) goto err_llb; else if (ret > min_limit) - drm_info_once(&i915->drm, + drm_info_once(display->drm, "Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); fbc->limit = ret; - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "reserved %llu bytes of contiguous stolen space for FBC, limit: %d\n", i915_gem_stolen_node_size(&fbc->compressed_fb), fbc->limit); return 0; @@ -820,7 +868,8 @@ err_llb: i915_gem_stolen_remove_node(i915, &fbc->compressed_llb); err: if (i915_gem_stolen_initialized(i915)) - drm_info_once(&i915->drm, "not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); + drm_info_once(display->drm, + "not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); return -ENOSPC; } @@ -831,14 +880,15 @@ static void intel_fbc_program_cfb(struct intel_fbc *fbc) static void intel_fbc_program_workarounds(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); if (IS_SKYLAKE(i915) || IS_BROXTON(i915)) { /* * WaFbcHighMemBwCorruptionAvoidance:skl,bxt * Display WA #0883: skl,bxt */ - intel_de_rmw(i915, ILK_DPFC_CHICKEN(fbc->id), + intel_de_rmw(display, ILK_DPFC_CHICKEN(fbc->id), 0, DPFC_DISABLE_DUMMY0); } @@ -848,24 +898,25 @@ static void intel_fbc_program_workarounds(struct intel_fbc *fbc) * WaFbcNukeOnHostModify:skl,kbl,cfl * Display WA #0873: skl,kbl,cfl */ - intel_de_rmw(i915, ILK_DPFC_CHICKEN(fbc->id), + intel_de_rmw(display, ILK_DPFC_CHICKEN(fbc->id), 0, DPFC_NUKE_ON_ANY_MODIFICATION); } /* Wa_1409120013:icl,jsl,tgl,dg1 */ - if (IS_DISPLAY_VER(i915, 11, 12)) - intel_de_rmw(i915, ILK_DPFC_CHICKEN(fbc->id), + if (IS_DISPLAY_VER(display, 11, 12)) + intel_de_rmw(display, ILK_DPFC_CHICKEN(fbc->id), 0, DPFC_CHICKEN_COMP_DUMMY_PIXEL); /* Wa_22014263786:icl,jsl,tgl,dg1,rkl,adls,adlp,mtl */ - if (DISPLAY_VER(i915) >= 11 && !IS_DG2(i915)) - intel_de_rmw(i915, ILK_DPFC_CHICKEN(fbc->id), + if (DISPLAY_VER(display) >= 11 && !IS_DG2(i915)) + intel_de_rmw(display, ILK_DPFC_CHICKEN(fbc->id), 0, DPFC_CHICKEN_FORCE_SLB_INVALIDATION); } static void __intel_fbc_cleanup_cfb(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); if (WARN_ON(intel_fbc_hw_is_active(fbc))) return; @@ -876,12 +927,12 @@ static void __intel_fbc_cleanup_cfb(struct intel_fbc *fbc) i915_gem_stolen_remove_node(i915, &fbc->compressed_fb); } -void intel_fbc_cleanup(struct drm_i915_private *i915) +void intel_fbc_cleanup(struct intel_display *display) { struct intel_fbc *fbc; enum intel_fbc_id fbc_id; - for_each_intel_fbc(i915, fbc, fbc_id) { + for_each_intel_fbc(display, fbc, fbc_id) { mutex_lock(&fbc->lock); __intel_fbc_cleanup_cfb(fbc); mutex_unlock(&fbc->lock); @@ -933,15 +984,16 @@ static bool icl_fbc_stride_is_valid(const struct intel_plane_state *plane_state) static bool stride_is_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + struct drm_i915_private *i915 = to_i915(display->drm); - if (DISPLAY_VER(i915) >= 11) + if (DISPLAY_VER(display) >= 11) return icl_fbc_stride_is_valid(plane_state); - else if (DISPLAY_VER(i915) >= 9) + else if (DISPLAY_VER(display) >= 9) return skl_fbc_stride_is_valid(plane_state); - else if (DISPLAY_VER(i915) >= 5 || IS_G4X(i915)) + else if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) return g4x_fbc_stride_is_valid(plane_state); - else if (DISPLAY_VER(i915) == 4) + else if (DISPLAY_VER(display) == 4) return i965_fbc_stride_is_valid(plane_state); else return i8xx_fbc_stride_is_valid(plane_state); @@ -949,7 +1001,7 @@ static bool stride_is_valid(const struct intel_plane_state *plane_state) static bool i8xx_fbc_pixel_format_is_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); const struct drm_framebuffer *fb = plane_state->hw.fb; switch (fb->format->format) { @@ -959,7 +1011,7 @@ static bool i8xx_fbc_pixel_format_is_valid(const struct intel_plane_state *plane case DRM_FORMAT_XRGB1555: case DRM_FORMAT_RGB565: /* 16bpp not supported on gen2 */ - if (DISPLAY_VER(i915) == 2) + if (DISPLAY_VER(display) == 2) return false; return true; default: @@ -969,7 +1021,8 @@ static bool i8xx_fbc_pixel_format_is_valid(const struct intel_plane_state *plane static bool g4x_fbc_pixel_format_is_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + struct drm_i915_private *i915 = to_i915(display->drm); const struct drm_framebuffer *fb = plane_state->hw.fb; switch (fb->format->format) { @@ -1004,11 +1057,12 @@ static bool lnl_fbc_pixel_format_is_valid(const struct intel_plane_state *plane_ static bool pixel_format_is_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + struct drm_i915_private *i915 = to_i915(display->drm); - if (DISPLAY_VER(i915) >= 20) + if (DISPLAY_VER(display) >= 20) return lnl_fbc_pixel_format_is_valid(plane_state); - else if (DISPLAY_VER(i915) >= 5 || IS_G4X(i915)) + else if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) return g4x_fbc_pixel_format_is_valid(plane_state); else return i8xx_fbc_pixel_format_is_valid(plane_state); @@ -1038,43 +1092,52 @@ static bool skl_fbc_rotation_is_valid(const struct intel_plane_state *plane_stat static bool rotation_is_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + struct drm_i915_private *i915 = to_i915(display->drm); - if (DISPLAY_VER(i915) >= 9) + if (DISPLAY_VER(display) >= 9) return skl_fbc_rotation_is_valid(plane_state); - else if (DISPLAY_VER(i915) >= 5 || IS_G4X(i915)) + else if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) return g4x_fbc_rotation_is_valid(plane_state); else return i8xx_fbc_rotation_is_valid(plane_state); } +static void intel_fbc_max_surface_size(struct intel_display *display, + unsigned int *w, unsigned int *h) +{ + struct drm_i915_private *i915 = to_i915(display->drm); + + if (DISPLAY_VER(display) >= 11) { + *w = 8192; + *h = 4096; + } else if (DISPLAY_VER(display) >= 10) { + *w = 5120; + *h = 4096; + } else if (DISPLAY_VER(display) >= 7) { + *w = 4096; + *h = 4096; + } else if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) { + *w = 4096; + *h = 2048; + } else { + *w = 2048; + *h = 1536; + } +} + /* * For some reason, the hardware tracking starts looking at whatever we * programmed as the display plane base address register. It does not look at * the X and Y offset registers. That's why we include the src x/y offsets * instead of just looking at the plane size. */ -static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state *plane_state) +static bool intel_fbc_surface_size_ok(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); unsigned int effective_w, effective_h, max_w, max_h; - if (DISPLAY_VER(i915) >= 11) { - max_w = 8192; - max_h = 4096; - } else if (DISPLAY_VER(i915) >= 10) { - max_w = 5120; - max_h = 4096; - } else if (DISPLAY_VER(i915) >= 7) { - max_w = 4096; - max_h = 4096; - } else if (IS_G4X(i915) || DISPLAY_VER(i915) >= 5) { - max_w = 4096; - max_h = 2048; - } else { - max_w = 2048; - max_h = 1536; - } + intel_fbc_max_surface_size(display, &max_w, &max_h); effective_w = plane_state->view.color_plane[0].x + (drm_rect_width(&plane_state->uapi.src) >> 16); @@ -1084,24 +1147,32 @@ static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state * return effective_w <= max_w && effective_h <= max_h; } -static bool intel_fbc_plane_size_valid(const struct intel_plane_state *plane_state) +static void intel_fbc_max_plane_size(struct intel_display *display, + unsigned int *w, unsigned int *h) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); - unsigned int w, h, max_w, max_h; + struct drm_i915_private *i915 = to_i915(display->drm); - if (DISPLAY_VER(i915) >= 10) { - max_w = 5120; - max_h = 4096; - } else if (DISPLAY_VER(i915) >= 8 || IS_HASWELL(i915)) { - max_w = 4096; - max_h = 4096; - } else if (IS_G4X(i915) || DISPLAY_VER(i915) >= 5) { - max_w = 4096; - max_h = 2048; + if (DISPLAY_VER(display) >= 10) { + *w = 5120; + *h = 4096; + } else if (DISPLAY_VER(display) >= 8 || IS_HASWELL(i915)) { + *w = 4096; + *h = 4096; + } else if (DISPLAY_VER(display) >= 5 || IS_G4X(i915)) { + *w = 4096; + *h = 2048; } else { - max_w = 2048; - max_h = 1536; + *w = 2048; + *h = 1536; } +} + +static bool intel_fbc_plane_size_valid(const struct intel_plane_state *plane_state) +{ + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); + unsigned int w, h, max_w, max_h; + + intel_fbc_max_plane_size(display, &max_w, &max_h); w = drm_rect_width(&plane_state->uapi.src) >> 16; h = drm_rect_height(&plane_state->uapi.src) >> 16; @@ -1123,9 +1194,9 @@ static bool skl_fbc_tiling_valid(const struct intel_plane_state *plane_state) static bool tiling_is_valid(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); - if (DISPLAY_VER(i915) >= 9) + if (DISPLAY_VER(display) >= 9) return skl_fbc_tiling_valid(plane_state); else return i8xx_fbc_tiling_valid(plane_state); @@ -1135,7 +1206,7 @@ static void intel_fbc_update_state(struct intel_atomic_state *state, struct intel_crtc *crtc, struct intel_plane *plane) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state->base.dev); const struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); const struct intel_plane_state *plane_state = @@ -1153,8 +1224,8 @@ static void intel_fbc_update_state(struct intel_atomic_state *state, fbc_state->fence_y_offset = intel_plane_fence_y_offset(plane_state); - drm_WARN_ON(&i915->drm, plane_state->flags & PLANE_HAS_FENCE && - !intel_gt_support_legacy_fencing(to_gt(i915))); + drm_WARN_ON(display->drm, plane_state->flags & PLANE_HAS_FENCE && + !intel_fbc_has_fences(display)); if (plane_state->flags & PLANE_HAS_FENCE) fbc_state->fence_id = i915_vma_fence_id(plane_state->ggtt_vma); @@ -1168,7 +1239,7 @@ static void intel_fbc_update_state(struct intel_atomic_state *state, static bool intel_fbc_is_fence_ok(const struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(plane_state->uapi.plane->dev); /* * The use of a CPU fence is one of two ways to detect writes by the @@ -1182,7 +1253,7 @@ static bool intel_fbc_is_fence_ok(const struct intel_plane_state *plane_state) * so have no fence associated with it) due to aperture constraints * at the time of pinning. */ - return DISPLAY_VER(i915) >= 9 || + return DISPLAY_VER(display) >= 9 || (plane_state->flags & PLANE_HAS_FENCE && i915_vma_fence_id(plane_state->ggtt_vma) != -1); } @@ -1207,7 +1278,8 @@ static bool intel_fbc_is_ok(const struct intel_plane_state *plane_state) static int intel_fbc_check_plane(struct intel_atomic_state *state, struct intel_plane *plane) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state->base.dev); + struct drm_i915_private *i915 = to_i915(display->drm); struct intel_plane_state *plane_state = intel_atomic_get_new_plane_state(state, plane); const struct drm_framebuffer *fb = plane_state->hw.fb; @@ -1228,7 +1300,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, return 0; } - if (!i915->display.params.enable_fbc) { + if (!display->params.enable_fbc) { plane_state->no_fbc_reason = "disabled per module param or by default"; return 0; } @@ -1266,15 +1338,15 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, * Recommendation is to keep this combination disabled * Bspec: 50422 HSD: 14010260002 */ - if (IS_DISPLAY_VER(i915, 12, 14) && crtc_state->has_sel_update && + if (IS_DISPLAY_VER(display, 12, 14) && crtc_state->has_sel_update && !crtc_state->has_panel_replay) { plane_state->no_fbc_reason = "PSR2 enabled"; return 0; } /* Wa_14016291713 */ - if ((IS_DISPLAY_VER(i915, 12, 13) || - IS_DISPLAY_IP_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_C0)) && + if ((IS_DISPLAY_VER(display, 12, 13) || + IS_DISPLAY_VER_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_C0)) && crtc_state->has_psr && !crtc_state->has_panel_replay) { plane_state->no_fbc_reason = "PSR1 enabled (Wa_14016291713)"; return 0; @@ -1300,7 +1372,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, return 0; } - if (DISPLAY_VER(i915) < 20 && + if (DISPLAY_VER(display) < 20 && plane_state->hw.pixel_blend_mode != DRM_MODE_BLEND_PIXEL_NONE && fb->format->has_alpha) { plane_state->no_fbc_reason = "per-pixel alpha not supported"; @@ -1312,7 +1384,7 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, return 0; } - if (!intel_fbc_hw_tracking_covers_screen(plane_state)) { + if (!intel_fbc_surface_size_ok(plane_state)) { plane_state->no_fbc_reason = "surface size too big"; return 0; } @@ -1322,14 +1394,14 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state, * having a Y offset that isn't divisible by 4 causes FIFO underrun * and screen flicker. */ - if (DISPLAY_VER(i915) >= 9 && + if (DISPLAY_VER(display) >= 9 && plane_state->view.color_plane[0].y & 3) { plane_state->no_fbc_reason = "plane start Y offset misaligned"; return 0; } /* Wa_22010751166: icl, ehl, tgl, dg1, rkl */ - if (DISPLAY_VER(i915) >= 11 && + if (DISPLAY_VER(display) >= 11 && (plane_state->view.color_plane[0].y + (drm_rect_height(&plane_state->uapi.src) >> 16)) & 3) { plane_state->no_fbc_reason = "plane end Y offset misaligned"; @@ -1405,7 +1477,7 @@ static bool __intel_fbc_pre_update(struct intel_atomic_state *state, struct intel_crtc *crtc, struct intel_plane *plane) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state->base.dev); struct intel_fbc *fbc = plane->fbc; bool need_vblank_wait = false; @@ -1431,7 +1503,7 @@ static bool __intel_fbc_pre_update(struct intel_atomic_state *state, * and skipping the extra vblank wait before the plane update * if at least one frame has already passed. */ - if (fbc->activated && DISPLAY_VER(i915) >= 10) + if (fbc->activated && DISPLAY_VER(display) >= 10) need_vblank_wait = true; fbc->activated = false; @@ -1465,13 +1537,13 @@ bool intel_fbc_pre_update(struct intel_atomic_state *state, static void __intel_fbc_disable(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; struct intel_plane *plane = fbc->state.plane; lockdep_assert_held(&fbc->lock); - drm_WARN_ON(&i915->drm, fbc->active); + drm_WARN_ON(display->drm, fbc->active); - drm_dbg_kms(&i915->drm, "Disabling FBC on [PLANE:%d:%s]\n", + drm_dbg_kms(display->drm, "Disabling FBC on [PLANE:%d:%s]\n", plane->base.base.id, plane->base.name); __intel_fbc_cleanup_cfb(fbc); @@ -1548,7 +1620,7 @@ void intel_fbc_invalidate(struct drm_i915_private *i915, struct intel_fbc *fbc; enum intel_fbc_id fbc_id; - for_each_intel_fbc(i915, fbc, fbc_id) + for_each_intel_fbc(&i915->display, fbc, fbc_id) __intel_fbc_invalidate(fbc, frontbuffer_bits, origin); } @@ -1587,7 +1659,7 @@ void intel_fbc_flush(struct drm_i915_private *i915, struct intel_fbc *fbc; enum intel_fbc_id fbc_id; - for_each_intel_fbc(i915, fbc, fbc_id) + for_each_intel_fbc(&i915->display, fbc, fbc_id) __intel_fbc_flush(fbc, frontbuffer_bits, origin); } @@ -1612,7 +1684,7 @@ static void __intel_fbc_enable(struct intel_atomic_state *state, struct intel_crtc *crtc, struct intel_plane *plane) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state->base.dev); const struct intel_plane_state *plane_state = intel_atomic_get_new_plane_state(state, plane); struct intel_fbc *fbc = plane->fbc; @@ -1631,7 +1703,7 @@ static void __intel_fbc_enable(struct intel_atomic_state *state, __intel_fbc_disable(fbc); } - drm_WARN_ON(&i915->drm, fbc->active); + drm_WARN_ON(display->drm, fbc->active); fbc->no_fbc_reason = plane_state->no_fbc_reason; if (fbc->no_fbc_reason) @@ -1653,7 +1725,7 @@ static void __intel_fbc_enable(struct intel_atomic_state *state, return; } - drm_dbg_kms(&i915->drm, "Enabling FBC on [PLANE:%d:%s]\n", + drm_dbg_kms(display->drm, "Enabling FBC on [PLANE:%d:%s]\n", plane->base.base.id, plane->base.name); fbc->no_fbc_reason = "FBC enabled but not active yet\n"; @@ -1671,10 +1743,10 @@ static void __intel_fbc_enable(struct intel_atomic_state *state, */ void intel_fbc_disable(struct intel_crtc *crtc) { - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc->base.dev); struct intel_plane *plane; - for_each_intel_plane(&i915->drm, plane) { + for_each_intel_plane(display->drm, plane) { struct intel_fbc *fbc = plane->fbc; if (!fbc || plane->pipe != crtc->pipe) @@ -1719,7 +1791,8 @@ void intel_fbc_update(struct intel_atomic_state *state, static void intel_fbc_underrun_work_fn(struct work_struct *work) { struct intel_fbc *fbc = container_of(work, typeof(*fbc), underrun_work); - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); mutex_lock(&fbc->lock); @@ -1727,7 +1800,7 @@ static void intel_fbc_underrun_work_fn(struct work_struct *work) if (fbc->underrun_detected || !fbc->state.plane) goto out; - drm_dbg_kms(&i915->drm, "Disabling FBC due to FIFO underrun.\n"); + drm_dbg_kms(display->drm, "Disabling FBC due to FIFO underrun.\n"); fbc->underrun_detected = true; intel_fbc_deactivate(fbc, "FIFO underrun"); @@ -1740,14 +1813,14 @@ out: static void __intel_fbc_reset_underrun(struct intel_fbc *fbc) { - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; cancel_work_sync(&fbc->underrun_work); mutex_lock(&fbc->lock); if (fbc->underrun_detected) { - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Re-allowing FBC after fifo underrun\n"); fbc->no_fbc_reason = "FIFO underrun cleared"; } @@ -1758,22 +1831,24 @@ static void __intel_fbc_reset_underrun(struct intel_fbc *fbc) /* * intel_fbc_reset_underrun - reset FBC fifo underrun status. - * @i915: the i915 device + * @display: display * * See intel_fbc_handle_fifo_underrun_irq(). For automated testing we * want to re-enable FBC after an underrun to increase test coverage. */ -void intel_fbc_reset_underrun(struct drm_i915_private *i915) +void intel_fbc_reset_underrun(struct intel_display *display) { struct intel_fbc *fbc; enum intel_fbc_id fbc_id; - for_each_intel_fbc(i915, fbc, fbc_id) + for_each_intel_fbc(display, fbc, fbc_id) __intel_fbc_reset_underrun(fbc); } static void __intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc) { + struct drm_i915_private *i915 = to_i915(fbc->display->drm); + /* * There's no guarantee that underrun_detected won't be set to true * right after this check and before the work is scheduled, but that's @@ -1785,12 +1860,12 @@ static void __intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc) if (READ_ONCE(fbc->underrun_detected)) return; - queue_work(fbc->i915->unordered_wq, &fbc->underrun_work); + queue_work(i915->unordered_wq, &fbc->underrun_work); } /** * intel_fbc_handle_fifo_underrun_irq - disable FBC when we get a FIFO underrun - * @i915: i915 device + * @display: display * * Without FBC, most underruns are harmless and don't really cause too many * problems, except for an annoying message on dmesg. With FBC, underruns can @@ -1802,12 +1877,12 @@ static void __intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc) * * This function is called from the IRQ handler. */ -void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *i915) +void intel_fbc_handle_fifo_underrun_irq(struct intel_display *display) { struct intel_fbc *fbc; enum intel_fbc_id fbc_id; - for_each_intel_fbc(i915, fbc, fbc_id) + for_each_intel_fbc(display, fbc, fbc_id) __intel_fbc_handle_fifo_underrun_irq(fbc); } @@ -1820,15 +1895,17 @@ void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *i915) * space to change the value during runtime without sanitizing it again. IGT * relies on being able to change i915.enable_fbc at runtime. */ -static int intel_sanitize_fbc_option(struct drm_i915_private *i915) +static int intel_sanitize_fbc_option(struct intel_display *display) { - if (i915->display.params.enable_fbc >= 0) - return !!i915->display.params.enable_fbc; + struct drm_i915_private *i915 = to_i915(display->drm); + + if (display->params.enable_fbc >= 0) + return !!display->params.enable_fbc; - if (!HAS_FBC(i915)) + if (!HAS_FBC(display)) return 0; - if (IS_BROADWELL(i915) || DISPLAY_VER(i915) >= 9) + if (IS_BROADWELL(i915) || DISPLAY_VER(display) >= 9) return 1; return 0; @@ -1839,9 +1916,10 @@ void intel_fbc_add_plane(struct intel_fbc *fbc, struct intel_plane *plane) plane->fbc = fbc; } -static struct intel_fbc *intel_fbc_create(struct drm_i915_private *i915, +static struct intel_fbc *intel_fbc_create(struct intel_display *display, enum intel_fbc_id fbc_id) { + struct drm_i915_private *i915 = to_i915(display->drm); struct intel_fbc *fbc; fbc = kzalloc(sizeof(*fbc), GFP_KERNEL); @@ -1849,19 +1927,19 @@ static struct intel_fbc *intel_fbc_create(struct drm_i915_private *i915, return NULL; fbc->id = fbc_id; - fbc->i915 = i915; + fbc->display = display; INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn); mutex_init(&fbc->lock); - if (DISPLAY_VER(i915) >= 7) + if (DISPLAY_VER(display) >= 7) fbc->funcs = &ivb_fbc_funcs; - else if (DISPLAY_VER(i915) == 6) + else if (DISPLAY_VER(display) == 6) fbc->funcs = &snb_fbc_funcs; - else if (DISPLAY_VER(i915) == 5) + else if (DISPLAY_VER(display) == 5) fbc->funcs = &ilk_fbc_funcs; else if (IS_G4X(i915)) fbc->funcs = &g4x_fbc_funcs; - else if (DISPLAY_VER(i915) == 4) + else if (DISPLAY_VER(display) == 4) fbc->funcs = &i965_fbc_funcs; else fbc->funcs = &i8xx_fbc_funcs; @@ -1871,36 +1949,36 @@ static struct intel_fbc *intel_fbc_create(struct drm_i915_private *i915, /** * intel_fbc_init - Initialize FBC - * @i915: the i915 device + * @display: display * * This function might be called during PM init process. */ -void intel_fbc_init(struct drm_i915_private *i915) +void intel_fbc_init(struct intel_display *display) { enum intel_fbc_id fbc_id; - i915->display.params.enable_fbc = intel_sanitize_fbc_option(i915); - drm_dbg_kms(&i915->drm, "Sanitized enable_fbc value: %d\n", - i915->display.params.enable_fbc); + display->params.enable_fbc = intel_sanitize_fbc_option(display); + drm_dbg_kms(display->drm, "Sanitized enable_fbc value: %d\n", + display->params.enable_fbc); - for_each_fbc_id(i915, fbc_id) - i915->display.fbc[fbc_id] = intel_fbc_create(i915, fbc_id); + for_each_fbc_id(display, fbc_id) + display->fbc[fbc_id] = intel_fbc_create(display, fbc_id); } /** * intel_fbc_sanitize - Sanitize FBC - * @i915: the i915 device + * @display: display * * Make sure FBC is initially disabled since we have no * idea eg. into which parts of stolen it might be scribbling * into. */ -void intel_fbc_sanitize(struct drm_i915_private *i915) +void intel_fbc_sanitize(struct intel_display *display) { struct intel_fbc *fbc; enum intel_fbc_id fbc_id; - for_each_intel_fbc(i915, fbc, fbc_id) { + for_each_intel_fbc(display, fbc, fbc_id) { if (intel_fbc_hw_is_active(fbc)) intel_fbc_hw_deactivate(fbc); } @@ -1909,11 +1987,12 @@ void intel_fbc_sanitize(struct drm_i915_private *i915) static int intel_fbc_debugfs_status_show(struct seq_file *m, void *unused) { struct intel_fbc *fbc = m->private; - struct drm_i915_private *i915 = fbc->i915; + struct intel_display *display = fbc->display; + struct drm_i915_private *i915 = to_i915(display->drm); struct intel_plane *plane; intel_wakeref_t wakeref; - drm_modeset_lock_all(&i915->drm); + drm_modeset_lock_all(display->drm); wakeref = intel_runtime_pm_get(&i915->runtime_pm); mutex_lock(&fbc->lock); @@ -1926,7 +2005,7 @@ static int intel_fbc_debugfs_status_show(struct seq_file *m, void *unused) seq_printf(m, "FBC disabled: %s\n", fbc->no_fbc_reason); } - for_each_intel_plane(&i915->drm, plane) { + for_each_intel_plane(display->drm, plane) { const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); @@ -1942,7 +2021,7 @@ static int intel_fbc_debugfs_status_show(struct seq_file *m, void *unused) mutex_unlock(&fbc->lock); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - drm_modeset_unlock_all(&i915->drm); + drm_modeset_unlock_all(display->drm); return 0; } @@ -1999,12 +2078,12 @@ void intel_fbc_crtc_debugfs_add(struct intel_crtc *crtc) } /* FIXME: remove this once igt is on board with per-crtc stuff */ -void intel_fbc_debugfs_register(struct drm_i915_private *i915) +void intel_fbc_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = i915->drm.primary; + struct drm_minor *minor = display->drm->primary; struct intel_fbc *fbc; - fbc = i915->display.fbc[INTEL_FBC_A]; + fbc = display->fbc[INTEL_FBC_A]; if (fbc) intel_fbc_debugfs_add(fbc, minor->debugfs_root); } diff --git a/drivers/gpu/drm/i915/display/intel_fbc.h b/drivers/gpu/drm/i915/display/intel_fbc.h index 6720ec8ee8a2..ceae55458e14 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.h +++ b/drivers/gpu/drm/i915/display/intel_fbc.h @@ -13,6 +13,7 @@ struct drm_i915_private; struct intel_atomic_state; struct intel_crtc; struct intel_crtc_state; +struct intel_display; struct intel_fbc; struct intel_plane; struct intel_plane_state; @@ -31,9 +32,9 @@ bool intel_fbc_pre_update(struct intel_atomic_state *state, struct intel_crtc *crtc); void intel_fbc_post_update(struct intel_atomic_state *state, struct intel_crtc *crtc); -void intel_fbc_init(struct drm_i915_private *dev_priv); -void intel_fbc_cleanup(struct drm_i915_private *dev_priv); -void intel_fbc_sanitize(struct drm_i915_private *dev_priv); +void intel_fbc_init(struct intel_display *display); +void intel_fbc_cleanup(struct intel_display *display); +void intel_fbc_sanitize(struct intel_display *display); void intel_fbc_update(struct intel_atomic_state *state, struct intel_crtc *crtc); void intel_fbc_disable(struct intel_crtc *crtc); @@ -43,9 +44,9 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv, void intel_fbc_flush(struct drm_i915_private *dev_priv, unsigned int frontbuffer_bits, enum fb_op_origin origin); void intel_fbc_add_plane(struct intel_fbc *fbc, struct intel_plane *plane); -void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *i915); -void intel_fbc_reset_underrun(struct drm_i915_private *i915); +void intel_fbc_handle_fifo_underrun_irq(struct intel_display *display); +void intel_fbc_reset_underrun(struct intel_display *display); void intel_fbc_crtc_debugfs_add(struct intel_crtc *crtc); -void intel_fbc_debugfs_register(struct drm_i915_private *i915); +void intel_fbc_debugfs_register(struct intel_display *display); #endif /* __INTEL_FBC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index d33befd7994d..222cd0e1a2bc 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -5,6 +5,8 @@ #include <linux/string_helpers.h> +#include <drm/drm_fixed.h> + #include "i915_reg.h" #include "intel_atomic.h" #include "intel_crtc.h" @@ -304,7 +306,7 @@ int intel_fdi_link_freq(struct drm_i915_private *i915, bool intel_fdi_compute_pipe_bpp(struct intel_crtc_state *crtc_state) { int pipe_bpp = min(crtc_state->pipe_bpp, - to_bpp_int(crtc_state->max_link_bpp_x16)); + fxp_q4_to_int(crtc_state->max_link_bpp_x16)); pipe_bpp = rounddown(pipe_bpp, 2 * 3); @@ -340,7 +342,7 @@ int ilk_fdi_compute_config(struct intel_crtc *crtc, pipe_config->fdi_lanes = lane; - intel_link_compute_m_n(to_bpp_x16(pipe_config->pipe_bpp), + intel_link_compute_m_n(fxp_q4_from_int(pipe_config->pipe_bpp), lane, fdi_dotclock, link_bw, intel_dp_bw_fec_overhead(false), diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c index e5e4ca7cc499..8949fbb1cc60 100644 --- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c @@ -440,7 +440,7 @@ void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, drm_err(&dev_priv->drm, "CPU pipe %c FIFO underrun\n", pipe_name(pipe)); } - intel_fbc_handle_fifo_underrun_irq(dev_priv); + intel_fbc_handle_fifo_underrun_irq(&dev_priv->display); } /** diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index 9c8e1e91ff1c..6470f75106bd 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -478,7 +478,7 @@ gmbus_xfer_read_chunk(struct drm_i915_private *i915, /* * HW spec says that 512Bytes in Burst read need special treatment. * But it doesn't talk about other multiple of 256Bytes. And couldn't locate - * an I2C slave, which supports such a lengthy burst read too for experiments. + * an I2C target, which supports such a lengthy burst read too for experiments. * * So until things get clarified on HW support, to avoid the burst read length * in fold of 256Bytes except 512, max burst read length is fixed at 767Bytes. @@ -701,7 +701,7 @@ clear_err: /* Toggle the Software Clear Interrupt bit. This has the effect * of resetting the GMBUS controller and so clearing the - * BUS_ERROR raised by the slave's NAK. + * BUS_ERROR raised by the target's NAK. */ intel_de_write_fw(i915, GMBUS1(i915), GMBUS_SW_CLR_INT); intel_de_write_fw(i915, GMBUS1(i915), 0); diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 3ebe035f382e..94418f218448 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -42,11 +42,11 @@ intel_hdcp_disable_hdcp_line_rekeying(struct intel_encoder *encoder, return; if (DISPLAY_VER(dev_priv) >= 14) { - if (IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_D0, STEP_FOREVER)) + if (IS_DISPLAY_VER_STEP(dev_priv, IP_VER(14, 0), STEP_D0, STEP_FOREVER)) intel_de_rmw(dev_priv, MTL_CHICKEN_TRANS(hdcp->cpu_transcoder), 0, HDCP_LINE_REKEY_DISABLE); - else if (IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 1), STEP_B0, STEP_FOREVER) || - IS_DISPLAY_IP_STEP(dev_priv, IP_VER(20, 0), STEP_B0, STEP_FOREVER)) + else if (IS_DISPLAY_VER_STEP(dev_priv, IP_VER(14, 1), STEP_B0, STEP_FOREVER) || + IS_DISPLAY_VER_STEP(dev_priv, IP_VER(20, 0), STEP_B0, STEP_FOREVER)) intel_de_rmw(dev_priv, TRANS_DDI_FUNC_CTL(dev_priv, hdcp->cpu_transcoder), 0, TRANS_DDI_HDCP_LINE_REKEY_DISABLE); @@ -203,11 +203,16 @@ int intel_hdcp_read_valid_bksv(struct intel_digital_port *dig_port, /* Is HDCP1.4 capable on Platform and Sink */ bool intel_hdcp_get_capability(struct intel_connector *connector) { - struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_digital_port *dig_port; const struct intel_hdcp_shim *shim = connector->hdcp.shim; bool capable = false; u8 bksv[5]; + if (!intel_attached_encoder(connector)) + return capable; + + dig_port = intel_attached_dig_port(connector); + if (!shim) return capable; diff --git a/drivers/gpu/drm/i915/display/intel_hti.c b/drivers/gpu/drm/i915/display/intel_hti.c index a92d008d4e6e..19d1f196d9fb 100644 --- a/drivers/gpu/drm/i915/display/intel_hti.c +++ b/drivers/gpu/drm/i915/display/intel_hti.c @@ -9,33 +9,33 @@ #include "intel_hti.h" #include "intel_hti_regs.h" -void intel_hti_init(struct drm_i915_private *i915) +void intel_hti_init(struct intel_display *display) { /* * If the platform has HTI, we need to find out whether it has reserved * any display resources before we create our display outputs. */ - if (DISPLAY_INFO(i915)->has_hti) - i915->display.hti.state = intel_de_read(i915, HDPORT_STATE); + if (DISPLAY_INFO(display)->has_hti) + display->hti.state = intel_de_read(display, HDPORT_STATE); } -bool intel_hti_uses_phy(struct drm_i915_private *i915, enum phy phy) +bool intel_hti_uses_phy(struct intel_display *display, enum phy phy) { - if (drm_WARN_ON(&i915->drm, phy == PHY_NONE)) + if (drm_WARN_ON(display->drm, phy == PHY_NONE)) return false; - return i915->display.hti.state & HDPORT_ENABLED && - i915->display.hti.state & HDPORT_DDI_USED(phy); + return display->hti.state & HDPORT_ENABLED && + display->hti.state & HDPORT_DDI_USED(phy); } -u32 intel_hti_dpll_mask(struct drm_i915_private *i915) +u32 intel_hti_dpll_mask(struct intel_display *display) { - if (!(i915->display.hti.state & HDPORT_ENABLED)) + if (!(display->hti.state & HDPORT_ENABLED)) return 0; /* * Note: This is subtle. The values must coincide with what's defined * for the platform. */ - return REG_FIELD_GET(HDPORT_DPLL_USED_MASK, i915->display.hti.state); + return REG_FIELD_GET(HDPORT_DPLL_USED_MASK, display->hti.state); } diff --git a/drivers/gpu/drm/i915/display/intel_hti.h b/drivers/gpu/drm/i915/display/intel_hti.h index 2893d6668657..b692571c5558 100644 --- a/drivers/gpu/drm/i915/display/intel_hti.h +++ b/drivers/gpu/drm/i915/display/intel_hti.h @@ -8,11 +8,11 @@ #include <linux/types.h> -struct drm_i915_private; +struct intel_display; enum phy; -void intel_hti_init(struct drm_i915_private *i915); -bool intel_hti_uses_phy(struct drm_i915_private *i915, enum phy phy); -u32 intel_hti_dpll_mask(struct drm_i915_private *i915); +void intel_hti_init(struct intel_display *display); +bool intel_hti_uses_phy(struct intel_display *display, enum phy phy); +u32 intel_hti_dpll_mask(struct intel_display *display); #endif /* __INTEL_HTI_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.c b/drivers/gpu/drm/i915/display/intel_link_bw.c index dfd7d5e23f3f..e7a9b860fac6 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.c +++ b/drivers/gpu/drm/i915/display/intel_link_bw.c @@ -3,6 +3,8 @@ * Copyright © 2023 Intel Corporation */ +#include <drm/drm_fixed.h> + #include "i915_drv.h" #include "intel_atomic.h" @@ -23,12 +25,13 @@ void intel_link_bw_init_limits(struct intel_atomic_state *state, struct intel_link_bw_limits *limits) { + struct intel_display *display = to_intel_display(state); struct drm_i915_private *i915 = to_i915(state->base.dev); enum pipe pipe; limits->force_fec_pipes = 0; limits->bpp_limit_reached_pipes = 0; - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { const struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, intel_crtc_for_pipe(i915, pipe)); @@ -67,12 +70,12 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state, u8 pipe_mask, const char *reason) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); enum pipe max_bpp_pipe = INVALID_PIPE; struct intel_crtc *crtc; int max_bpp_x16 = 0; - for_each_intel_crtc_in_pipe_mask(&i915->drm, crtc, pipe_mask) { + for_each_intel_crtc_in_pipe_mask(display->drm, crtc, pipe_mask) { struct intel_crtc_state *crtc_state; int link_bpp_x16; @@ -93,7 +96,7 @@ int intel_link_bw_reduce_bpp(struct intel_atomic_state *state, * is based on the pipe bpp value, set the actual link bpp * limit here once the MST BW allocation is fixed. */ - link_bpp_x16 = to_bpp_x16(crtc_state->pipe_bpp); + link_bpp_x16 = fxp_q4_from_int(crtc_state->pipe_bpp); if (link_bpp_x16 > max_bpp_x16) { max_bpp_x16 = link_bpp_x16; @@ -134,7 +137,7 @@ intel_link_bw_set_bpp_limit_for_pipe(struct intel_atomic_state *state, struct intel_link_bw_limits *new_limits, enum pipe pipe) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); if (pipe == INVALID_PIPE) return false; @@ -143,7 +146,7 @@ intel_link_bw_set_bpp_limit_for_pipe(struct intel_atomic_state *state, old_limits->max_bpp_x16[pipe]) return false; - if (drm_WARN_ON(&i915->drm, + if (drm_WARN_ON(display->drm, new_limits->bpp_limit_reached_pipes & BIT(pipe))) return false; @@ -176,7 +179,7 @@ static int check_all_link_config(struct intel_atomic_state *state, } static bool -assert_link_limit_change_valid(struct drm_i915_private *i915, +assert_link_limit_change_valid(struct intel_display *display, const struct intel_link_bw_limits *old_limits, const struct intel_link_bw_limits *new_limits) { @@ -184,14 +187,14 @@ assert_link_limit_change_valid(struct drm_i915_private *i915, enum pipe pipe; /* FEC can't be forced off after it was forced on. */ - if (drm_WARN_ON(&i915->drm, + if (drm_WARN_ON(display->drm, (old_limits->force_fec_pipes & new_limits->force_fec_pipes) != old_limits->force_fec_pipes)) return false; - for_each_pipe(i915, pipe) { + for_each_pipe(display, pipe) { /* The bpp limit can only decrease. */ - if (drm_WARN_ON(&i915->drm, + if (drm_WARN_ON(display->drm, new_limits->max_bpp_x16[pipe] > old_limits->max_bpp_x16[pipe])) return false; @@ -202,7 +205,7 @@ assert_link_limit_change_valid(struct drm_i915_private *i915, } /* At least one limit must change. */ - if (drm_WARN_ON(&i915->drm, + if (drm_WARN_ON(display->drm, !bpps_changed && new_limits->force_fec_pipes == old_limits->force_fec_pipes)) @@ -230,7 +233,7 @@ assert_link_limit_change_valid(struct drm_i915_private *i915, int intel_link_bw_atomic_check(struct intel_atomic_state *state, struct intel_link_bw_limits *new_limits) { - struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_display *display = to_intel_display(state); struct intel_link_bw_limits old_limits = *new_limits; int ret; @@ -238,7 +241,7 @@ int intel_link_bw_atomic_check(struct intel_atomic_state *state, if (ret != -EAGAIN) return ret; - if (!assert_link_limit_change_valid(i915, &old_limits, new_limits)) + if (!assert_link_limit_change_valid(display, &old_limits, new_limits)) return -EINVAL; return -EAGAIN; diff --git a/drivers/gpu/drm/i915/display/intel_link_bw.h b/drivers/gpu/drm/i915/display/intel_link_bw.h index 6b0ccfff59da..e69049cf178f 100644 --- a/drivers/gpu/drm/i915/display/intel_link_bw.h +++ b/drivers/gpu/drm/i915/display/intel_link_bw.h @@ -10,8 +10,6 @@ #include "intel_display_limits.h" -struct drm_i915_private; - struct intel_atomic_state; struct intel_crtc_state; diff --git a/drivers/gpu/drm/i915/display/intel_load_detect.c b/drivers/gpu/drm/i915/display/intel_load_detect.c index d5a0aecf3e8f..b457c69dc0be 100644 --- a/drivers/gpu/drm/i915/display/intel_load_detect.c +++ b/drivers/gpu/drm/i915/display/intel_load_detect.c @@ -48,23 +48,22 @@ struct drm_atomic_state * intel_load_detect_get_pipe(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx) { + struct intel_display *display = to_intel_display(connector->dev); struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); struct intel_crtc *possible_crtc; struct intel_crtc *crtc = NULL; - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_mode_config *config = &dev->mode_config; + struct drm_mode_config *config = &display->drm->mode_config; struct drm_atomic_state *state = NULL, *restore_state = NULL; struct drm_connector_state *connector_state; struct intel_crtc_state *crtc_state; int ret; - drm_dbg_kms(&dev_priv->drm, "[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", connector->base.id, connector->name, encoder->base.base.id, encoder->base.name); - drm_WARN_ON(dev, !drm_modeset_is_locked(&config->connection_mutex)); + drm_WARN_ON(display->drm, !drm_modeset_is_locked(&config->connection_mutex)); /* * Algorithm gets a little messy: @@ -89,7 +88,7 @@ intel_load_detect_get_pipe(struct drm_connector *connector, } /* Find an unused one (if possible) */ - for_each_intel_crtc(dev, possible_crtc) { + for_each_intel_crtc(display->drm, possible_crtc) { if (!(encoder->base.possible_crtcs & drm_crtc_mask(&possible_crtc->base))) continue; @@ -111,15 +110,15 @@ intel_load_detect_get_pipe(struct drm_connector *connector, * If we didn't find an unused CRTC, don't use any. */ if (!crtc) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "no pipe available for load-detect\n"); ret = -ENODEV; goto fail; } found: - state = drm_atomic_state_alloc(dev); - restore_state = drm_atomic_state_alloc(dev); + state = drm_atomic_state_alloc(display->drm); + restore_state = drm_atomic_state_alloc(display->drm); if (!state || !restore_state) { ret = -ENOMEM; goto fail; @@ -164,7 +163,7 @@ found: if (!ret) ret = drm_atomic_add_affected_planes(restore_state, &crtc->base); if (ret) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Failed to create a copy of old state to restore: %i\n", ret); goto fail; @@ -172,7 +171,7 @@ found: ret = drm_atomic_commit(state); if (ret) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "failed to set mode on load-detect pipe\n"); goto fail; } @@ -204,13 +203,13 @@ void intel_load_detect_release_pipe(struct drm_connector *connector, struct drm_atomic_state *state, struct drm_modeset_acquire_ctx *ctx) { + struct intel_display *display = to_intel_display(connector->dev); struct intel_encoder *intel_encoder = intel_attached_encoder(to_intel_connector(connector)); - struct drm_i915_private *i915 = to_i915(intel_encoder->base.dev); struct drm_encoder *encoder = &intel_encoder->base; int ret; - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", connector->base.id, connector->name, encoder->base.id, encoder->name); @@ -219,7 +218,7 @@ void intel_load_detect_release_pipe(struct drm_connector *connector, ret = drm_atomic_helper_commit_duplicated_state(state, ctx); if (ret) - drm_dbg_kms(&i915->drm, + drm_dbg_kms(display->drm, "Couldn't release load detect pipe: %i\n", ret); drm_atomic_state_put(state); } diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c index 8b26354d6e53..f9db867fae89 100644 --- a/drivers/gpu/drm/i915/display/intel_lspcon.c +++ b/drivers/gpu/drm/i915/display/intel_lspcon.c @@ -79,33 +79,33 @@ static const char *lspcon_mode_name(enum drm_lspcon_mode mode) static bool lspcon_detect_vendor(struct intel_lspcon *lspcon) { - struct intel_dp *dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(dp); + struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); + struct intel_display *display = to_intel_display(intel_dp); struct drm_dp_dpcd_ident *ident; u32 vendor_oui; - if (drm_dp_read_desc(&dp->aux, &dp->desc, drm_dp_is_branch(dp->dpcd))) { - drm_err(&i915->drm, "Can't read description\n"); + if (drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, drm_dp_is_branch(intel_dp->dpcd))) { + drm_err(display->drm, "Can't read description\n"); return false; } - ident = &dp->desc.ident; + ident = &intel_dp->desc.ident; vendor_oui = (ident->oui[0] << 16) | (ident->oui[1] << 8) | ident->oui[2]; switch (vendor_oui) { case LSPCON_VENDOR_MCA_OUI: lspcon->vendor = LSPCON_VENDOR_MCA; - drm_dbg_kms(&i915->drm, "Vendor: Mega Chips\n"); + drm_dbg_kms(display->drm, "Vendor: Mega Chips\n"); break; case LSPCON_VENDOR_PARADE_OUI: lspcon->vendor = LSPCON_VENDOR_PARADE; - drm_dbg_kms(&i915->drm, "Vendor: Parade Tech\n"); + drm_dbg_kms(display->drm, "Vendor: Parade Tech\n"); break; default: - drm_err(&i915->drm, "Invalid/Unknown vendor OUI\n"); + drm_err(display->drm, "Invalid/Unknown vendor OUI\n"); return false; } @@ -123,7 +123,7 @@ static u32 get_hdr_status_reg(struct intel_lspcon *lspcon) void lspcon_detect_hdr_capability(struct intel_lspcon *lspcon) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); u8 hdr_caps; int ret; @@ -131,10 +131,10 @@ void lspcon_detect_hdr_capability(struct intel_lspcon *lspcon) &hdr_caps, 1); if (ret < 0) { - drm_dbg_kms(&i915->drm, "HDR capability detection failed\n"); + drm_dbg_kms(display->drm, "HDR capability detection failed\n"); lspcon->hdr_supported = false; } else if (hdr_caps & 0x1) { - drm_dbg_kms(&i915->drm, "LSPCON capable of HDR\n"); + drm_dbg_kms(display->drm, "LSPCON capable of HDR\n"); lspcon->hdr_supported = true; } } @@ -142,12 +142,12 @@ void lspcon_detect_hdr_capability(struct intel_lspcon *lspcon) static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); enum drm_lspcon_mode current_mode; struct i2c_adapter *ddc = &intel_dp->aux.ddc; if (drm_lspcon_get_mode(intel_dp->aux.drm_dev, ddc, ¤t_mode)) { - drm_dbg_kms(&i915->drm, "Error reading LSPCON mode\n"); + drm_dbg_kms(display->drm, "Error reading LSPCON mode\n"); return DRM_LSPCON_MODE_INVALID; } return current_mode; @@ -169,23 +169,23 @@ static enum drm_lspcon_mode lspcon_wait_mode(struct intel_lspcon *lspcon, enum drm_lspcon_mode mode) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); enum drm_lspcon_mode current_mode; current_mode = lspcon_get_current_mode(lspcon); if (current_mode == mode) goto out; - drm_dbg_kms(&i915->drm, "Waiting for LSPCON mode %s to settle\n", + drm_dbg_kms(display->drm, "Waiting for LSPCON mode %s to settle\n", lspcon_mode_name(mode)); wait_for((current_mode = lspcon_get_current_mode(lspcon)) == mode, lspcon_get_mode_settle_timeout(lspcon)); if (current_mode != mode) - drm_err(&i915->drm, "LSPCON mode hasn't settled\n"); + drm_err(display->drm, "LSPCON mode hasn't settled\n"); out: - drm_dbg_kms(&i915->drm, "Current LSPCON mode %s\n", + drm_dbg_kms(display->drm, "Current LSPCON mode %s\n", lspcon_mode_name(current_mode)); return current_mode; @@ -195,46 +195,46 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon, enum drm_lspcon_mode mode) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); int err; enum drm_lspcon_mode current_mode; struct i2c_adapter *ddc = &intel_dp->aux.ddc; err = drm_lspcon_get_mode(intel_dp->aux.drm_dev, ddc, ¤t_mode); if (err) { - drm_err(&i915->drm, "Error reading LSPCON mode\n"); + drm_err(display->drm, "Error reading LSPCON mode\n"); return err; } if (current_mode == mode) { - drm_dbg_kms(&i915->drm, "Current mode = desired LSPCON mode\n"); + drm_dbg_kms(display->drm, "Current mode = desired LSPCON mode\n"); return 0; } err = drm_lspcon_set_mode(intel_dp->aux.drm_dev, ddc, mode); if (err < 0) { - drm_err(&i915->drm, "LSPCON mode change failed\n"); + drm_err(display->drm, "LSPCON mode change failed\n"); return err; } lspcon->mode = mode; - drm_dbg_kms(&i915->drm, "LSPCON mode changed done\n"); + drm_dbg_kms(display->drm, "LSPCON mode changed done\n"); return 0; } static bool lspcon_wake_native_aux_ch(struct intel_lspcon *lspcon) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); u8 rev; if (drm_dp_dpcd_readb(&lspcon_to_intel_dp(lspcon)->aux, DP_DPCD_REV, &rev) != 1) { - drm_dbg_kms(&i915->drm, "Native AUX CH down\n"); + drm_dbg_kms(display->drm, "Native AUX CH down\n"); return false; } - drm_dbg_kms(&i915->drm, "Native AUX CH up, DPCD version: %d.%d\n", + drm_dbg_kms(display->drm, "Native AUX CH up, DPCD version: %d.%d\n", rev >> 4, rev & 0xf); return true; @@ -242,12 +242,12 @@ static bool lspcon_wake_native_aux_ch(struct intel_lspcon *lspcon) static bool lspcon_probe(struct intel_lspcon *lspcon) { - int retry; - enum drm_dp_dual_mode_type adaptor_type; struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct i2c_adapter *ddc = &intel_dp->aux.ddc; + enum drm_dp_dual_mode_type adaptor_type; enum drm_lspcon_mode expected_mode; + int retry; expected_mode = lspcon_wake_native_aux_ch(lspcon) ? DRM_LSPCON_MODE_PCON : DRM_LSPCON_MODE_LS; @@ -263,13 +263,13 @@ static bool lspcon_probe(struct intel_lspcon *lspcon) } if (adaptor_type != DRM_DP_DUAL_MODE_LSPCON) { - drm_dbg_kms(&i915->drm, "No LSPCON detected, found %s\n", + drm_dbg_kms(display->drm, "No LSPCON detected, found %s\n", drm_dp_get_dual_mode_type_name(adaptor_type)); return false; } /* Yay ... got a LSPCON device */ - drm_dbg_kms(&i915->drm, "LSPCON detected\n"); + drm_dbg_kms(display->drm, "LSPCON detected\n"); lspcon->mode = lspcon_wait_mode(lspcon, expected_mode); /* @@ -279,7 +279,7 @@ static bool lspcon_probe(struct intel_lspcon *lspcon) */ if (lspcon->mode != DRM_LSPCON_MODE_PCON) { if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON) < 0) { - drm_err(&i915->drm, "LSPCON mode change to PCON failed\n"); + drm_err(display->drm, "LSPCON mode change to PCON failed\n"); return false; } } @@ -289,13 +289,13 @@ static bool lspcon_probe(struct intel_lspcon *lspcon) static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_display *display = to_intel_display(intel_dp); struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); unsigned long start = jiffies; while (1) { if (intel_digital_port_connected(&dig_port->base)) { - drm_dbg_kms(&i915->drm, "LSPCON recovering in PCON mode after %u ms\n", + drm_dbg_kms(display->drm, "LSPCON recovering in PCON mode after %u ms\n", jiffies_to_msecs(jiffies - start)); return; } @@ -306,7 +306,7 @@ static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) usleep_range(10000, 15000); } - drm_dbg_kms(&i915->drm, "LSPCON DP descriptor mismatch after resume\n"); + drm_dbg_kms(display->drm, "LSPCON DP descriptor mismatch after resume\n"); } static bool lspcon_parade_fw_ready(struct drm_dp_aux *aux) @@ -477,10 +477,10 @@ void lspcon_write_infoframe(struct intel_encoder *encoder, unsigned int type, const void *frame, ssize_t len) { - bool ret = true; + struct intel_display *display = to_intel_display(encoder); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder); + bool ret = true; switch (type) { case HDMI_INFOFRAME_TYPE_AVI: @@ -492,7 +492,7 @@ void lspcon_write_infoframe(struct intel_encoder *encoder, frame, len); break; case HDMI_PACKET_TYPE_GAMUT_METADATA: - drm_dbg_kms(&i915->drm, "Update HDR metadata for lspcon\n"); + drm_dbg_kms(display->drm, "Update HDR metadata for lspcon\n"); /* It uses the legacy hsw implementation for the same */ hsw_write_infoframe(encoder, crtc_state, type, frame, len); break; @@ -501,7 +501,7 @@ void lspcon_write_infoframe(struct intel_encoder *encoder, } if (!ret) { - drm_err(&i915->drm, "Failed to write infoframes\n"); + drm_err(display->drm, "Failed to write infoframes\n"); return; } } @@ -522,17 +522,17 @@ void lspcon_set_infoframes(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { - ssize_t ret; - union hdmi_infoframe frame; - u8 buf[VIDEO_DIP_DATA_SIZE]; + struct intel_display *display = to_intel_display(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct intel_lspcon *lspcon = &dig_port->lspcon; - struct drm_i915_private *i915 = to_i915(encoder->base.dev); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + union hdmi_infoframe frame; + u8 buf[VIDEO_DIP_DATA_SIZE]; + ssize_t ret; if (!lspcon->active) { - drm_err(&i915->drm, "Writing infoframes while LSPCON disabled ?\n"); + drm_err(display->drm, "Writing infoframes while LSPCON disabled ?\n"); return; } @@ -542,7 +542,7 @@ void lspcon_set_infoframes(struct intel_encoder *encoder, conn_state->connector, adjusted_mode); if (ret < 0) { - drm_err(&i915->drm, "couldn't fill AVI infoframe\n"); + drm_err(display->drm, "couldn't fill AVI infoframe\n"); return; } @@ -583,7 +583,7 @@ void lspcon_set_infoframes(struct intel_encoder *encoder, ret = hdmi_infoframe_pack(&frame, buf, sizeof(buf)); if (ret < 0) { - drm_err(&i915->drm, "Failed to pack AVI IF\n"); + drm_err(display->drm, "Failed to pack AVI IF\n"); return; } @@ -624,9 +624,9 @@ static bool _lspcon_read_avi_infoframe_enabled_parade(struct drm_dp_aux *aux) u32 lspcon_infoframes_enabled(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { + struct intel_display *display = to_intel_display(encoder); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); struct intel_lspcon *lspcon = enc_to_intel_lspcon(encoder); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); bool infoframes_enabled; u32 val = 0; u32 mask, tmp; @@ -640,8 +640,8 @@ u32 lspcon_infoframes_enabled(struct intel_encoder *encoder, val |= intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_AVI); if (lspcon->hdr_supported) { - tmp = intel_de_read(dev_priv, - HSW_TVIDEO_DIP_CTL(dev_priv, pipe_config->cpu_transcoder)); + tmp = intel_de_read(display, + HSW_TVIDEO_DIP_CTL(display, pipe_config->cpu_transcoder)); mask = VIDEO_DIP_ENABLE_GMP_HSW; if (tmp & mask) @@ -658,32 +658,32 @@ void lspcon_wait_pcon_mode(struct intel_lspcon *lspcon) bool lspcon_init(struct intel_digital_port *dig_port) { + struct intel_display *display = to_intel_display(dig_port); struct intel_dp *intel_dp = &dig_port->dp; struct intel_lspcon *lspcon = &dig_port->lspcon; - struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct drm_connector *connector = &intel_dp->attached_connector->base; lspcon->active = false; lspcon->mode = DRM_LSPCON_MODE_INVALID; if (!lspcon_probe(lspcon)) { - drm_err(&i915->drm, "Failed to probe lspcon\n"); + drm_err(display->drm, "Failed to probe lspcon\n"); return false; } if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd) != 0) { - drm_err(&i915->drm, "LSPCON DPCD read failed\n"); + drm_err(display->drm, "LSPCON DPCD read failed\n"); return false; } if (!lspcon_detect_vendor(lspcon)) { - drm_err(&i915->drm, "LSPCON vendor detection failed\n"); + drm_err(display->drm, "LSPCON vendor detection failed\n"); return false; } connector->ycbcr_420_allowed = true; lspcon->active = true; - drm_dbg_kms(&i915->drm, "Success: LSPCON init\n"); + drm_dbg_kms(display->drm, "Success: LSPCON init\n"); return true; } @@ -697,9 +697,8 @@ u32 intel_lspcon_infoframes_enabled(struct intel_encoder *encoder, void lspcon_resume(struct intel_digital_port *dig_port) { + struct intel_display *display = to_intel_display(dig_port); struct intel_lspcon *lspcon = &dig_port->lspcon; - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *i915 = to_i915(dev); enum drm_lspcon_mode expected_mode; if (!intel_bios_encoder_is_lspcon(dig_port->base.devdata)) @@ -707,7 +706,7 @@ void lspcon_resume(struct intel_digital_port *dig_port) if (!lspcon->active) { if (!lspcon_init(dig_port)) { - drm_err(&i915->drm, "LSPCON init failed on port %c\n", + drm_err(display->drm, "LSPCON init failed on port %c\n", port_name(dig_port->base.port)); return; } @@ -724,7 +723,7 @@ void lspcon_resume(struct intel_digital_port *dig_port) return; if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON)) - drm_err(&i915->drm, "LSPCON resume failed\n"); + drm_err(display->drm, "LSPCON resume failed\n"); else - drm_dbg_kms(&i915->drm, "LSPCON resume success\n"); + drm_dbg_kms(display->drm, "LSPCON resume success\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 9f018503d4fd..fb4ed9f7855b 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -838,6 +838,7 @@ static void intel_lvds_add_properties(struct drm_connector *connector) */ void intel_lvds_init(struct drm_i915_private *i915) { + struct intel_display *display = &i915->display; struct intel_lvds_encoder *lvds_encoder; struct intel_connector *connector; const struct drm_edid *drm_edid; @@ -872,7 +873,7 @@ void intel_lvds_init(struct drm_i915_private *i915) } ddc_pin = GMBUS_PIN_PANEL; - if (!intel_bios_is_lvds_present(i915, &ddc_pin)) { + if (!intel_bios_is_lvds_present(display, &ddc_pin)) { if ((lvds & LVDS_PORT_EN) == 0) { drm_dbg_kms(&i915->drm, "LVDS is not present in VBT\n"); @@ -966,7 +967,7 @@ void intel_lvds_init(struct drm_i915_private *i915) } else { drm_edid = ERR_PTR(-ENOENT); } - intel_bios_init_panel_late(i915, &connector->panel, NULL, + intel_bios_init_panel_late(display, &connector->panel, NULL, IS_ERR(drm_edid) ? NULL : drm_edid); /* Try EDID first */ diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c index 7602cb30ebf1..72694dde3c22 100644 --- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c +++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c @@ -326,6 +326,8 @@ static void intel_modeset_update_connector_atomic_state(struct drm_i915_private static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state) { + struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + if (intel_crtc_is_joiner_secondary(crtc_state)) return; @@ -337,11 +339,30 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state crtc_state->uapi.adjusted_mode = crtc_state->hw.adjusted_mode; crtc_state->uapi.scaling_filter = crtc_state->hw.scaling_filter; - /* assume 1:1 mapping */ - drm_property_replace_blob(&crtc_state->hw.degamma_lut, - crtc_state->pre_csc_lut); - drm_property_replace_blob(&crtc_state->hw.gamma_lut, - crtc_state->post_csc_lut); + if (DISPLAY_INFO(i915)->color.degamma_lut_size) { + /* assume 1:1 mapping */ + drm_property_replace_blob(&crtc_state->hw.degamma_lut, + crtc_state->pre_csc_lut); + drm_property_replace_blob(&crtc_state->hw.gamma_lut, + crtc_state->post_csc_lut); + } else { + /* + * ilk/snb hw may be configured for either pre_csc_lut + * or post_csc_lut, but we don't advertise degamma_lut as + * being available in the uapi since there is only one + * hardware LUT. Always assign the result of the readout + * to gamma_lut as that is the only valid source of LUTs + * in the uapi. + */ + drm_WARN_ON(&i915->drm, crtc_state->post_csc_lut && + crtc_state->pre_csc_lut); + + drm_property_replace_blob(&crtc_state->hw.degamma_lut, + NULL); + drm_property_replace_blob(&crtc_state->hw.gamma_lut, + crtc_state->post_csc_lut ?: + crtc_state->pre_csc_lut); + } drm_property_replace_blob(&crtc_state->uapi.degamma_lut, crtc_state->hw.degamma_lut); @@ -966,7 +987,7 @@ void intel_modeset_setup_hw_state(struct drm_i915_private *i915, } } - intel_fbc_sanitize(i915); + intel_fbc_sanitize(&i915->display); intel_sanitize_plane_mapping(i915); diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index 293c4d920cf9..ff11836459de 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -252,7 +252,7 @@ struct opregion_asle_ext { #define OPREGION_SIZE (8 * 1024) struct intel_opregion { - struct drm_i915_private *i915; + struct intel_display *display; struct opregion_header *header; struct opregion_acpi *acpi; @@ -268,9 +268,9 @@ struct intel_opregion { struct notifier_block acpi_notifier; }; -static int check_swsci_function(struct drm_i915_private *i915, u32 function) +static int check_swsci_function(struct intel_display *display, u32 function) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; struct opregion_swsci *swsci; u32 main_function, sub_function; @@ -300,20 +300,20 @@ static int check_swsci_function(struct drm_i915_private *i915, u32 function) return 0; } -static int swsci(struct drm_i915_private *dev_priv, +static int swsci(struct intel_display *display, u32 function, u32 parm, u32 *parm_out) { struct opregion_swsci *swsci; - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); u32 scic, dslp; u16 swsci_val; int ret; - ret = check_swsci_function(dev_priv, function); + ret = check_swsci_function(display, function); if (ret) return ret; - swsci = dev_priv->display.opregion->swsci; + swsci = display->opregion->swsci; /* Driver sleep timeout in ms. */ dslp = swsci->dslp; @@ -331,7 +331,7 @@ static int swsci(struct drm_i915_private *dev_priv, /* The spec tells us to do this, but we are the only user... */ scic = swsci->scic; if (scic & SWSCI_SCIC_INDICATOR) { - drm_dbg(&dev_priv->drm, "SWSCI request already in progress\n"); + drm_dbg(display->drm, "SWSCI request already in progress\n"); return -EBUSY; } @@ -355,7 +355,7 @@ static int swsci(struct drm_i915_private *dev_priv, /* Poll for the result. */ #define C (((scic = swsci->scic) & SWSCI_SCIC_INDICATOR) == 0) if (wait_for(C, dslp)) { - drm_dbg(&dev_priv->drm, "SWSCI request timed out\n"); + drm_dbg(display->drm, "SWSCI request timed out\n"); return -ETIMEDOUT; } @@ -364,7 +364,7 @@ static int swsci(struct drm_i915_private *dev_priv, /* Note: scic == 0 is an error! */ if (scic != SWSCI_SCIC_EXIT_STATUS_SUCCESS) { - drm_dbg(&dev_priv->drm, "SWSCI request error %u\n", scic); + drm_dbg(display->drm, "SWSCI request error %u\n", scic); return -EIO; } @@ -381,28 +381,28 @@ static int swsci(struct drm_i915_private *dev_priv, #define DISPLAY_TYPE_EXTERNAL_FLAT_PANEL 2 #define DISPLAY_TYPE_INTERNAL_FLAT_PANEL 3 -int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, +int intel_opregion_notify_encoder(struct intel_encoder *encoder, bool enable) { - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); + struct intel_display *display = to_intel_display(encoder); u32 parm = 0; u32 type = 0; u32 port; int ret; /* don't care about old stuff for now */ - if (!HAS_DDI(dev_priv)) + if (!HAS_DDI(display)) return 0; /* Avoid port out of bounds checks if SWSCI isn't there. */ - ret = check_swsci_function(dev_priv, SWSCI_SBCB_DISPLAY_POWER_STATE); + ret = check_swsci_function(display, SWSCI_SBCB_DISPLAY_POWER_STATE); if (ret) return ret; - if (intel_encoder->type == INTEL_OUTPUT_DSI) + if (encoder->type == INTEL_OUTPUT_DSI) port = 0; else - port = intel_encoder->port; + port = encoder->port; if (port == PORT_E) { port = 0; @@ -419,17 +419,17 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, * number is out of bounds after mapping. */ if (port > 4) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", - intel_encoder->base.base.id, intel_encoder->base.name, - port_name(intel_encoder->port), port); + encoder->base.base.id, encoder->base.name, + port_name(encoder->port), port); return -EINVAL; } if (!enable) parm |= 4 << 8; - switch (intel_encoder->type) { + switch (encoder->type) { case INTEL_OUTPUT_ANALOG: type = DISPLAY_TYPE_CRT; break; @@ -444,15 +444,15 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, type = DISPLAY_TYPE_INTERNAL_FLAT_PANEL; break; default: - drm_WARN_ONCE(&dev_priv->drm, 1, + drm_WARN_ONCE(display->drm, 1, "unsupported intel_encoder type %d\n", - intel_encoder->type); + encoder->type); return -EINVAL; } parm |= type << (16 + port * 3); - return swsci(dev_priv, SWSCI_SBCB_DISPLAY_POWER_STATE, parm, NULL); + return swsci(display, SWSCI_SBCB_DISPLAY_POWER_STATE, parm, NULL); } static const struct { @@ -466,33 +466,33 @@ static const struct { { PCI_D3cold, 0x04 }, }; -int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv, +int intel_opregion_notify_adapter(struct intel_display *display, pci_power_t state) { int i; - if (!HAS_DDI(dev_priv)) + if (!HAS_DDI(display)) return 0; for (i = 0; i < ARRAY_SIZE(power_state_map); i++) { if (state == power_state_map[i].pci_power_state) - return swsci(dev_priv, SWSCI_SBCB_ADAPTER_POWER_STATE, + return swsci(display, SWSCI_SBCB_ADAPTER_POWER_STATE, power_state_map[i].parm, NULL); } return -EINVAL; } -static u32 asle_set_backlight(struct drm_i915_private *dev_priv, u32 bclp) +static u32 asle_set_backlight(struct intel_display *display, u32 bclp) { struct intel_connector *connector; struct drm_connector_list_iter conn_iter; - struct opregion_asle *asle = dev_priv->display.opregion->asle; + struct opregion_asle *asle = display->opregion->asle; - drm_dbg(&dev_priv->drm, "bclp = 0x%08x\n", bclp); + drm_dbg(display->drm, "bclp = 0x%08x\n", bclp); if (acpi_video_get_backlight_type() == acpi_backlight_native) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "opregion backlight request ignored\n"); return 0; } @@ -504,104 +504,104 @@ static u32 asle_set_backlight(struct drm_i915_private *dev_priv, u32 bclp) if (bclp > 255) return ASLC_BACKLIGHT_FAILED; - drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, NULL); + drm_modeset_lock(&display->drm->mode_config.connection_mutex, NULL); /* * Update backlight on all connectors that support backlight (usually * only one). */ - drm_dbg_kms(&dev_priv->drm, "updating opregion backlight %d/255\n", + drm_dbg_kms(display->drm, "updating opregion backlight %d/255\n", bclp); - drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) intel_backlight_set_acpi(connector->base.state, bclp, 255); drm_connector_list_iter_end(&conn_iter); asle->cblv = DIV_ROUND_UP(bclp * 100, 255) | ASLE_CBLV_VALID; - drm_modeset_unlock(&dev_priv->drm.mode_config.connection_mutex); + drm_modeset_unlock(&display->drm->mode_config.connection_mutex); return 0; } -static u32 asle_set_als_illum(struct drm_i915_private *dev_priv, u32 alsi) +static u32 asle_set_als_illum(struct intel_display *display, u32 alsi) { /* alsi is the current ALS reading in lux. 0 indicates below sensor range, 0xffff indicates above sensor range. 1-0xfffe are valid */ - drm_dbg(&dev_priv->drm, "Illum is not supported\n"); + drm_dbg(display->drm, "Illum is not supported\n"); return ASLC_ALS_ILLUM_FAILED; } -static u32 asle_set_pwm_freq(struct drm_i915_private *dev_priv, u32 pfmb) +static u32 asle_set_pwm_freq(struct intel_display *display, u32 pfmb) { - drm_dbg(&dev_priv->drm, "PWM freq is not supported\n"); + drm_dbg(display->drm, "PWM freq is not supported\n"); return ASLC_PWM_FREQ_FAILED; } -static u32 asle_set_pfit(struct drm_i915_private *dev_priv, u32 pfit) +static u32 asle_set_pfit(struct intel_display *display, u32 pfit) { /* Panel fitting is currently controlled by the X code, so this is a noop until modesetting support works fully */ - drm_dbg(&dev_priv->drm, "Pfit is not supported\n"); + drm_dbg(display->drm, "Pfit is not supported\n"); return ASLC_PFIT_FAILED; } -static u32 asle_set_supported_rotation_angles(struct drm_i915_private *dev_priv, u32 srot) +static u32 asle_set_supported_rotation_angles(struct intel_display *display, u32 srot) { - drm_dbg(&dev_priv->drm, "SROT is not supported\n"); + drm_dbg(display->drm, "SROT is not supported\n"); return ASLC_ROTATION_ANGLES_FAILED; } -static u32 asle_set_button_array(struct drm_i915_private *dev_priv, u32 iuer) +static u32 asle_set_button_array(struct intel_display *display, u32 iuer) { if (!iuer) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Button array event is not supported (nothing)\n"); if (iuer & ASLE_IUER_ROTATION_LOCK_BTN) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Button array event is not supported (rotation lock)\n"); if (iuer & ASLE_IUER_VOLUME_DOWN_BTN) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Button array event is not supported (volume down)\n"); if (iuer & ASLE_IUER_VOLUME_UP_BTN) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Button array event is not supported (volume up)\n"); if (iuer & ASLE_IUER_WINDOWS_BTN) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Button array event is not supported (windows)\n"); if (iuer & ASLE_IUER_POWER_BTN) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Button array event is not supported (power)\n"); return ASLC_BUTTON_ARRAY_FAILED; } -static u32 asle_set_convertible(struct drm_i915_private *dev_priv, u32 iuer) +static u32 asle_set_convertible(struct intel_display *display, u32 iuer) { if (iuer & ASLE_IUER_CONVERTIBLE) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Convertible is not supported (clamshell)\n"); else - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Convertible is not supported (slate)\n"); return ASLC_CONVERTIBLE_FAILED; } -static u32 asle_set_docking(struct drm_i915_private *dev_priv, u32 iuer) +static u32 asle_set_docking(struct intel_display *display, u32 iuer) { if (iuer & ASLE_IUER_DOCKING) - drm_dbg(&dev_priv->drm, "Docking is not supported (docked)\n"); + drm_dbg(display->drm, "Docking is not supported (docked)\n"); else - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "Docking is not supported (undocked)\n"); return ASLC_DOCKING_FAILED; } -static u32 asle_isct_state(struct drm_i915_private *dev_priv) +static u32 asle_isct_state(struct intel_display *display) { - drm_dbg(&dev_priv->drm, "ISCT is not supported\n"); + drm_dbg(display->drm, "ISCT is not supported\n"); return ASLC_ISCT_STATE_FAILED; } @@ -609,7 +609,7 @@ static void asle_work(struct work_struct *work) { struct intel_opregion *opregion = container_of(work, struct intel_opregion, asle_work); - struct drm_i915_private *dev_priv = opregion->i915; + struct intel_display *display = opregion->display; struct opregion_asle *asle = opregion->asle; u32 aslc_stat = 0; u32 aslc_req; @@ -620,50 +620,51 @@ static void asle_work(struct work_struct *work) aslc_req = asle->aslc; if (!(aslc_req & ASLC_REQ_MSK)) { - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "No request on ASLC interrupt 0x%08x\n", aslc_req); return; } if (aslc_req & ASLC_SET_ALS_ILLUM) - aslc_stat |= asle_set_als_illum(dev_priv, asle->alsi); + aslc_stat |= asle_set_als_illum(display, asle->alsi); if (aslc_req & ASLC_SET_BACKLIGHT) - aslc_stat |= asle_set_backlight(dev_priv, asle->bclp); + aslc_stat |= asle_set_backlight(display, asle->bclp); if (aslc_req & ASLC_SET_PFIT) - aslc_stat |= asle_set_pfit(dev_priv, asle->pfit); + aslc_stat |= asle_set_pfit(display, asle->pfit); if (aslc_req & ASLC_SET_PWM_FREQ) - aslc_stat |= asle_set_pwm_freq(dev_priv, asle->pfmb); + aslc_stat |= asle_set_pwm_freq(display, asle->pfmb); if (aslc_req & ASLC_SUPPORTED_ROTATION_ANGLES) - aslc_stat |= asle_set_supported_rotation_angles(dev_priv, + aslc_stat |= asle_set_supported_rotation_angles(display, asle->srot); if (aslc_req & ASLC_BUTTON_ARRAY) - aslc_stat |= asle_set_button_array(dev_priv, asle->iuer); + aslc_stat |= asle_set_button_array(display, asle->iuer); if (aslc_req & ASLC_CONVERTIBLE_INDICATOR) - aslc_stat |= asle_set_convertible(dev_priv, asle->iuer); + aslc_stat |= asle_set_convertible(display, asle->iuer); if (aslc_req & ASLC_DOCKING_INDICATOR) - aslc_stat |= asle_set_docking(dev_priv, asle->iuer); + aslc_stat |= asle_set_docking(display, asle->iuer); if (aslc_req & ASLC_ISCT_STATE_CHANGE) - aslc_stat |= asle_isct_state(dev_priv); + aslc_stat |= asle_isct_state(display); asle->aslc = aslc_stat; } -bool intel_opregion_asle_present(struct drm_i915_private *i915) +bool intel_opregion_asle_present(struct intel_display *display) { - return i915->display.opregion && i915->display.opregion->asle; + return display->opregion && display->opregion->asle; } -void intel_opregion_asle_intr(struct drm_i915_private *i915) +void intel_opregion_asle_intr(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct drm_i915_private *i915 = to_i915(display->drm); + struct intel_opregion *opregion = display->opregion; if (opregion && opregion->asle) queue_work(i915->unordered_wq, &opregion->asle_work); @@ -720,9 +721,9 @@ static void set_did(struct intel_opregion *opregion, int i, u32 val) } } -static void intel_didl_outputs(struct drm_i915_private *dev_priv) +static void intel_didl_outputs(struct intel_display *display) { - struct intel_opregion *opregion = dev_priv->display.opregion; + struct intel_opregion *opregion = display->opregion; struct intel_connector *connector; struct drm_connector_list_iter conn_iter; int i = 0, max_outputs; @@ -737,9 +738,9 @@ static void intel_didl_outputs(struct drm_i915_private *dev_priv) max_outputs = ARRAY_SIZE(opregion->acpi->didl) + ARRAY_SIZE(opregion->acpi->did2); - intel_acpi_device_id_update(dev_priv); + intel_acpi_device_id_update(display); - drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (i < max_outputs) set_did(opregion, i, connector->acpi_device_id); @@ -747,10 +748,10 @@ static void intel_didl_outputs(struct drm_i915_private *dev_priv) } drm_connector_list_iter_end(&conn_iter); - drm_dbg_kms(&dev_priv->drm, "%d outputs detected\n", i); + drm_dbg_kms(display->drm, "%d outputs detected\n", i); if (i > max_outputs) - drm_err(&dev_priv->drm, + drm_err(display->drm, "More than %d outputs in connector list\n", max_outputs); @@ -759,9 +760,9 @@ static void intel_didl_outputs(struct drm_i915_private *dev_priv) set_did(opregion, i, 0); } -static void intel_setup_cadls(struct drm_i915_private *dev_priv) +static void intel_setup_cadls(struct intel_display *display) { - struct intel_opregion *opregion = dev_priv->display.opregion; + struct intel_opregion *opregion = display->opregion; struct intel_connector *connector; struct drm_connector_list_iter conn_iter; int i = 0; @@ -776,7 +777,7 @@ static void intel_setup_cadls(struct drm_i915_private *dev_priv) * Note that internal panels should be at the front of the connector * list already, ensuring they're not left out. */ - drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_connector_list_iter_begin(display->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (i >= ARRAY_SIZE(opregion->acpi->cadl)) break; @@ -789,9 +790,9 @@ static void intel_setup_cadls(struct drm_i915_private *dev_priv) opregion->acpi->cadl[i] = 0; } -static void swsci_setup(struct drm_i915_private *dev_priv) +static void swsci_setup(struct intel_display *display) { - struct intel_opregion *opregion = dev_priv->display.opregion; + struct intel_opregion *opregion = display->opregion; bool requested_callbacks = false; u32 tmp; @@ -800,7 +801,7 @@ static void swsci_setup(struct drm_i915_private *dev_priv) opregion->swsci_sbcb_sub_functions = 1; /* We use GBDA to ask for supported GBDA calls. */ - if (swsci(dev_priv, SWSCI_GBDA_SUPPORTED_CALLS, 0, &tmp) == 0) { + if (swsci(display, SWSCI_GBDA_SUPPORTED_CALLS, 0, &tmp) == 0) { /* make the bits match the sub-function codes */ tmp <<= 1; opregion->swsci_gbda_sub_functions |= tmp; @@ -811,7 +812,7 @@ static void swsci_setup(struct drm_i915_private *dev_priv) * must not call interfaces that are not specifically requested by the * bios. */ - if (swsci(dev_priv, SWSCI_GBDA_REQUESTED_CALLBACKS, 0, &tmp) == 0) { + if (swsci(display, SWSCI_GBDA_REQUESTED_CALLBACKS, 0, &tmp) == 0) { /* here, the bits already match sub-function codes */ opregion->swsci_sbcb_sub_functions |= tmp; requested_callbacks = true; @@ -822,7 +823,7 @@ static void swsci_setup(struct drm_i915_private *dev_priv) * the callback is _requested_. But we still can't call interfaces that * are not requested. */ - if (swsci(dev_priv, SWSCI_SBCB_SUPPORTED_CALLBACKS, 0, &tmp) == 0) { + if (swsci(display, SWSCI_SBCB_SUPPORTED_CALLBACKS, 0, &tmp) == 0) { /* make the bits match the sub-function codes */ u32 low = tmp & 0x7ff; u32 high = tmp & ~0xfff; /* bit 11 is reserved */ @@ -832,7 +833,7 @@ static void swsci_setup(struct drm_i915_private *dev_priv) if (requested_callbacks) { u32 req = opregion->swsci_sbcb_sub_functions; if ((req & tmp) != req) - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "SWSCI BIOS requested (%08x) SBCB callbacks that are not supported (%08x)\n", req, tmp); /* XXX: for now, trust the requested callbacks */ @@ -842,7 +843,7 @@ static void swsci_setup(struct drm_i915_private *dev_priv) } } - drm_dbg(&dev_priv->drm, + drm_dbg(display->drm, "SWSCI GBDA callbacks %08x, SBCB callbacks %08x\n", opregion->swsci_gbda_sub_functions, opregion->swsci_sbcb_sub_functions); @@ -867,10 +868,10 @@ static const struct dmi_system_id intel_no_opregion_vbt[] = { { } }; -int intel_opregion_setup(struct drm_i915_private *dev_priv) +int intel_opregion_setup(struct intel_display *display) { struct intel_opregion *opregion; - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + struct pci_dev *pdev = to_pci_dev(display->drm->dev); u32 asls, mboxes; char buf[sizeof(OPREGION_SIGNATURE)]; int err = 0; @@ -885,10 +886,10 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) BUILD_BUG_ON(sizeof(struct opregion_asle_ext) != 0x400); pci_read_config_dword(pdev, ASLS, &asls); - drm_dbg(&dev_priv->drm, "graphic opregion physical addr: 0x%x\n", + drm_dbg(display->drm, "graphic opregion physical addr: 0x%x\n", asls); if (asls == 0) { - drm_dbg(&dev_priv->drm, "ACPI OpRegion not supported!\n"); + drm_dbg(display->drm, "ACPI OpRegion not supported!\n"); return -ENOTSUPP; } @@ -896,8 +897,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) if (!opregion) return -ENOMEM; - opregion->i915 = dev_priv; - dev_priv->display.opregion = opregion; + opregion->display = display; + display->opregion = opregion; INIT_WORK(&opregion->asle_work, asle_work); @@ -910,20 +911,20 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) memcpy(buf, base, sizeof(buf)); if (memcmp(buf, OPREGION_SIGNATURE, 16)) { - drm_dbg(&dev_priv->drm, "opregion signature mismatch\n"); + drm_dbg(display->drm, "opregion signature mismatch\n"); err = -EINVAL; goto err_out; } opregion->header = base; - drm_dbg(&dev_priv->drm, "ACPI OpRegion version %u.%u.%u\n", + drm_dbg(display->drm, "ACPI OpRegion version %u.%u.%u\n", opregion->header->over.major, opregion->header->over.minor, opregion->header->over.revision); mboxes = opregion->header->mboxes; if (mboxes & MBOX_ACPI) { - drm_dbg(&dev_priv->drm, "Public ACPI methods supported\n"); + drm_dbg(display->drm, "Public ACPI methods supported\n"); opregion->acpi = base + OPREGION_ACPI_OFFSET; /* * Indicate we handle monitor hotplug events ourselves so we do @@ -938,30 +939,30 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) u8 major = opregion->header->over.major; if (major >= 3) { - drm_err(&dev_priv->drm, "SWSCI Mailbox #2 present for opregion v3.x, ignoring\n"); + drm_err(display->drm, "SWSCI Mailbox #2 present for opregion v3.x, ignoring\n"); } else { if (major >= 2) - drm_dbg(&dev_priv->drm, "SWSCI Mailbox #2 present for opregion v2.x\n"); - drm_dbg(&dev_priv->drm, "SWSCI supported\n"); + drm_dbg(display->drm, "SWSCI Mailbox #2 present for opregion v2.x\n"); + drm_dbg(display->drm, "SWSCI supported\n"); opregion->swsci = base + OPREGION_SWSCI_OFFSET; - swsci_setup(dev_priv); + swsci_setup(display); } } if (mboxes & MBOX_ASLE) { - drm_dbg(&dev_priv->drm, "ASLE supported\n"); + drm_dbg(display->drm, "ASLE supported\n"); opregion->asle = base + OPREGION_ASLE_OFFSET; opregion->asle->ardy = ASLE_ARDY_NOT_READY; } if (mboxes & MBOX_ASLE_EXT) { - drm_dbg(&dev_priv->drm, "ASLE extension supported\n"); + drm_dbg(display->drm, "ASLE extension supported\n"); opregion->asle_ext = base + OPREGION_ASLE_EXT_OFFSET; } if (mboxes & MBOX_BACKLIGHT) { - drm_dbg(&dev_priv->drm, "Mailbox #2 for backlight present\n"); + drm_dbg(display->drm, "Mailbox #2 for backlight present\n"); } if (dmi_check_system(intel_no_opregion_vbt)) @@ -979,7 +980,7 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) */ if (opregion->header->over.major > 2 || opregion->header->over.minor >= 1) { - drm_WARN_ON(&dev_priv->drm, rvda < OPREGION_SIZE); + drm_WARN_ON(display->drm, rvda < OPREGION_SIZE); rvda += asls; } @@ -989,14 +990,14 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) vbt = opregion->rvda; vbt_size = opregion->asle->rvds; - if (intel_bios_is_valid_vbt(dev_priv, vbt, vbt_size)) { - drm_dbg_kms(&dev_priv->drm, + if (intel_bios_is_valid_vbt(display, vbt, vbt_size)) { + drm_dbg_kms(display->drm, "Found valid VBT in ACPI OpRegion (RVDA)\n"); opregion->vbt = vbt; opregion->vbt_size = vbt_size; goto out; } else { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Invalid VBT in ACPI OpRegion (RVDA)\n"); memunmap(opregion->rvda); opregion->rvda = NULL; @@ -1014,13 +1015,13 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) vbt_size = (mboxes & MBOX_ASLE_EXT) ? OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; vbt_size -= OPREGION_VBT_OFFSET; - if (intel_bios_is_valid_vbt(dev_priv, vbt, vbt_size)) { - drm_dbg_kms(&dev_priv->drm, + if (intel_bios_is_valid_vbt(display, vbt, vbt_size)) { + drm_dbg_kms(display->drm, "Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); opregion->vbt = vbt; opregion->vbt_size = vbt_size; } else { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Invalid VBT in ACPI OpRegion (Mailbox #4)\n"); } @@ -1031,7 +1032,7 @@ err_out: memunmap(base); err_memremap: kfree(opregion); - dev_priv->display.opregion = NULL; + display->opregion = NULL; return err; } @@ -1054,25 +1055,25 @@ static const struct dmi_system_id intel_use_opregion_panel_type[] = { }; int -intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) +intel_opregion_get_panel_type(struct intel_display *display) { u32 panel_details; int ret; - ret = swsci(dev_priv, SWSCI_GBDA_PANEL_DETAILS, 0x0, &panel_details); + ret = swsci(display, SWSCI_GBDA_PANEL_DETAILS, 0x0, &panel_details); if (ret) return ret; ret = (panel_details >> 8) & 0xff; if (ret > 0x10) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Invalid OpRegion panel type 0x%x\n", ret); return -EINVAL; } /* fall back to VBT panel type? */ if (ret == 0x0) { - drm_dbg_kms(&dev_priv->drm, "No panel type in OpRegion\n"); + drm_dbg_kms(display->drm, "No panel type in OpRegion\n"); return -ENODEV; } @@ -1082,7 +1083,7 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) * via a quirk list :( */ if (!dmi_check_system(intel_use_opregion_panel_type)) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Ignoring OpRegion panel type (%d)\n", ret - 1); return -ENODEV; } @@ -1092,7 +1093,7 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) /** * intel_opregion_get_edid - Fetch EDID from ACPI OpRegion mailbox #5 - * @intel_connector: eDP connector + * @connector: eDP connector * * This reads the ACPI Opregion mailbox #5 to extract the EDID that is passed * to it. @@ -1101,11 +1102,10 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) * The EDID in the OpRegion, or NULL if there is none or it's invalid. * */ -const struct drm_edid *intel_opregion_get_edid(struct intel_connector *intel_connector) +const struct drm_edid *intel_opregion_get_edid(struct intel_connector *connector) { - struct drm_connector *connector = &intel_connector->base; - struct drm_i915_private *i915 = to_i915(connector->dev); - struct intel_opregion *opregion = i915->display.opregion; + struct intel_display *display = to_intel_display(connector); + struct intel_opregion *opregion = display->opregion; const struct drm_edid *drm_edid; const void *edid; int len; @@ -1123,7 +1123,7 @@ const struct drm_edid *intel_opregion_get_edid(struct intel_connector *intel_con drm_edid = drm_edid_alloc(edid, len); if (!drm_edid_valid(drm_edid)) { - drm_dbg_kms(&i915->drm, "Invalid EDID in ACPI OpRegion (Mailbox #5)\n"); + drm_dbg_kms(display->drm, "Invalid EDID in ACPI OpRegion (Mailbox #5)\n"); drm_edid_free(drm_edid); drm_edid = NULL; } @@ -1131,9 +1131,9 @@ const struct drm_edid *intel_opregion_get_edid(struct intel_connector *intel_con return drm_edid; } -bool intel_opregion_vbt_present(struct drm_i915_private *i915) +bool intel_opregion_vbt_present(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (!opregion || !opregion->vbt) return false; @@ -1141,9 +1141,9 @@ bool intel_opregion_vbt_present(struct drm_i915_private *i915) return true; } -const void *intel_opregion_get_vbt(struct drm_i915_private *i915, size_t *size) +const void *intel_opregion_get_vbt(struct intel_display *display, size_t *size) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (!opregion || !opregion->vbt) return NULL; @@ -1154,9 +1154,9 @@ const void *intel_opregion_get_vbt(struct drm_i915_private *i915, size_t *size) return kmemdup(opregion->vbt, opregion->vbt_size, GFP_KERNEL); } -bool intel_opregion_headless_sku(struct drm_i915_private *i915) +bool intel_opregion_headless_sku(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; struct opregion_header *header; if (!opregion) @@ -1171,9 +1171,9 @@ bool intel_opregion_headless_sku(struct drm_i915_private *i915) return opregion->header->pcon & PCON_HEADLESS_SKU; } -void intel_opregion_register(struct drm_i915_private *i915) +void intel_opregion_register(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (!opregion) return; @@ -1184,16 +1184,16 @@ void intel_opregion_register(struct drm_i915_private *i915) register_acpi_notifier(&opregion->acpi_notifier); } - intel_opregion_resume(i915); + intel_opregion_resume(display); } -static void intel_opregion_resume_display(struct drm_i915_private *i915) +static void intel_opregion_resume_display(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (opregion->acpi) { - intel_didl_outputs(i915); - intel_setup_cadls(i915); + intel_didl_outputs(display); + intel_setup_cadls(display); /* * Notify BIOS we are ready to handle ACPI video ext notifs. @@ -1210,25 +1210,25 @@ static void intel_opregion_resume_display(struct drm_i915_private *i915) } /* Some platforms abuse the _DSM to enable MUX */ - intel_dsm_get_bios_data_funcs_supported(i915); + intel_dsm_get_bios_data_funcs_supported(display); } -void intel_opregion_resume(struct drm_i915_private *i915) +void intel_opregion_resume(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (!opregion) return; - if (HAS_DISPLAY(i915)) - intel_opregion_resume_display(i915); + if (HAS_DISPLAY(display)) + intel_opregion_resume_display(display); - intel_opregion_notify_adapter(i915, PCI_D0); + intel_opregion_notify_adapter(display, PCI_D0); } -static void intel_opregion_suspend_display(struct drm_i915_private *i915) +static void intel_opregion_suspend_display(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (opregion->asle) opregion->asle->ardy = ASLE_ARDY_NOT_READY; @@ -1239,24 +1239,24 @@ static void intel_opregion_suspend_display(struct drm_i915_private *i915) opregion->acpi->drdy = 0; } -void intel_opregion_suspend(struct drm_i915_private *i915, pci_power_t state) +void intel_opregion_suspend(struct intel_display *display, pci_power_t state) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (!opregion) return; - intel_opregion_notify_adapter(i915, state); + intel_opregion_notify_adapter(display, state); - if (HAS_DISPLAY(i915)) - intel_opregion_suspend_display(i915); + if (HAS_DISPLAY(display)) + intel_opregion_suspend_display(display); } -void intel_opregion_unregister(struct drm_i915_private *i915) +void intel_opregion_unregister(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; - intel_opregion_suspend(i915, PCI_D1); + intel_opregion_suspend(display, PCI_D1); if (!opregion) return; @@ -1267,9 +1267,9 @@ void intel_opregion_unregister(struct drm_i915_private *i915) } } -void intel_opregion_cleanup(struct drm_i915_private *i915) +void intel_opregion_cleanup(struct intel_display *display) { - struct intel_opregion *opregion = i915->display.opregion; + struct intel_opregion *opregion = display->opregion; if (!opregion) return; @@ -1278,13 +1278,13 @@ void intel_opregion_cleanup(struct drm_i915_private *i915) if (opregion->rvda) memunmap(opregion->rvda); kfree(opregion); - i915->display.opregion = NULL; + display->opregion = NULL; } static int intel_opregion_show(struct seq_file *m, void *unused) { - struct drm_i915_private *i915 = m->private; - struct intel_opregion *opregion = i915->display.opregion; + struct intel_display *display = m->private; + struct intel_opregion *opregion = display->opregion; if (opregion) seq_write(m, opregion->header, OPREGION_SIZE); @@ -1294,10 +1294,10 @@ static int intel_opregion_show(struct seq_file *m, void *unused) DEFINE_SHOW_ATTRIBUTE(intel_opregion); -void intel_opregion_debugfs_register(struct drm_i915_private *i915) +void intel_opregion_debugfs_register(struct intel_display *display) { - struct drm_minor *minor = i915->drm.primary; + struct drm_minor *minor = display->drm->primary; debugfs_create_file("i915_opregion", 0444, minor->debugfs_root, - i915, &intel_opregion_fops); + display, &intel_opregion_fops); } diff --git a/drivers/gpu/drm/i915/display/intel_opregion.h b/drivers/gpu/drm/i915/display/intel_opregion.h index 4b2b8e752632..8101eeebfd8b 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.h +++ b/drivers/gpu/drm/i915/display/intel_opregion.h @@ -28,88 +28,88 @@ #include <linux/pci.h> #include <linux/types.h> -struct drm_i915_private; struct intel_connector; +struct intel_display; struct intel_encoder; #ifdef CONFIG_ACPI -int intel_opregion_setup(struct drm_i915_private *dev_priv); -void intel_opregion_cleanup(struct drm_i915_private *i915); +int intel_opregion_setup(struct intel_display *display); +void intel_opregion_cleanup(struct intel_display *display); -void intel_opregion_register(struct drm_i915_private *dev_priv); -void intel_opregion_unregister(struct drm_i915_private *dev_priv); +void intel_opregion_register(struct intel_display *display); +void intel_opregion_unregister(struct intel_display *display); -void intel_opregion_resume(struct drm_i915_private *dev_priv); -void intel_opregion_suspend(struct drm_i915_private *dev_priv, +void intel_opregion_resume(struct intel_display *display); +void intel_opregion_suspend(struct intel_display *display, pci_power_t state); -bool intel_opregion_asle_present(struct drm_i915_private *i915); -void intel_opregion_asle_intr(struct drm_i915_private *dev_priv); -int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, +bool intel_opregion_asle_present(struct intel_display *display); +void intel_opregion_asle_intr(struct intel_display *display); +int intel_opregion_notify_encoder(struct intel_encoder *encoder, bool enable); -int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv, +int intel_opregion_notify_adapter(struct intel_display *display, pci_power_t state); -int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv); +int intel_opregion_get_panel_type(struct intel_display *display); const struct drm_edid *intel_opregion_get_edid(struct intel_connector *connector); -bool intel_opregion_vbt_present(struct drm_i915_private *i915); -const void *intel_opregion_get_vbt(struct drm_i915_private *i915, size_t *size); +bool intel_opregion_vbt_present(struct intel_display *display); +const void *intel_opregion_get_vbt(struct intel_display *display, size_t *size); -bool intel_opregion_headless_sku(struct drm_i915_private *i915); +bool intel_opregion_headless_sku(struct intel_display *display); -void intel_opregion_debugfs_register(struct drm_i915_private *i915); +void intel_opregion_debugfs_register(struct intel_display *display); #else /* CONFIG_ACPI*/ -static inline int intel_opregion_setup(struct drm_i915_private *dev_priv) +static inline int intel_opregion_setup(struct intel_display *display) { return 0; } -static inline void intel_opregion_cleanup(struct drm_i915_private *i915) +static inline void intel_opregion_cleanup(struct intel_display *display) { } -static inline void intel_opregion_register(struct drm_i915_private *dev_priv) +static inline void intel_opregion_register(struct intel_display *display) { } -static inline void intel_opregion_unregister(struct drm_i915_private *dev_priv) +static inline void intel_opregion_unregister(struct intel_display *display) { } -static inline void intel_opregion_resume(struct drm_i915_private *dev_priv) +static inline void intel_opregion_resume(struct intel_display *display) { } -static inline void intel_opregion_suspend(struct drm_i915_private *dev_priv, +static inline void intel_opregion_suspend(struct intel_display *display, pci_power_t state) { } -static inline bool intel_opregion_asle_present(struct drm_i915_private *i915) +static inline bool intel_opregion_asle_present(struct intel_display *display) { return false; } -static inline void intel_opregion_asle_intr(struct drm_i915_private *dev_priv) +static inline void intel_opregion_asle_intr(struct intel_display *display) { } static inline int -intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, bool enable) +intel_opregion_notify_encoder(struct intel_encoder *encoder, bool enable) { return 0; } static inline int -intel_opregion_notify_adapter(struct drm_i915_private *dev, pci_power_t state) +intel_opregion_notify_adapter(struct intel_display *display, pci_power_t state) { return 0; } -static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev) +static inline int intel_opregion_get_panel_type(struct intel_display *display) { return -ENODEV; } @@ -120,23 +120,23 @@ intel_opregion_get_edid(struct intel_connector *connector) return NULL; } -static inline bool intel_opregion_vbt_present(struct drm_i915_private *i915) +static inline bool intel_opregion_vbt_present(struct intel_display *display) { return false; } static inline const void * -intel_opregion_get_vbt(struct drm_i915_private *i915, size_t *size) +intel_opregion_get_vbt(struct intel_display *display, size_t *size) { return NULL; } -static inline bool intel_opregion_headless_sku(struct drm_i915_private *i915) +static inline bool intel_opregion_headless_sku(struct intel_display *display) { return false; } -static inline void intel_opregion_debugfs_register(struct drm_i915_private *i915) +static inline void intel_opregion_debugfs_register(struct intel_display *display) { } diff --git a/drivers/gpu/drm/i915/display/intel_pmdemand.c b/drivers/gpu/drm/i915/display/intel_pmdemand.c index 9ca981b7a12c..ceaf9e3147da 100644 --- a/drivers/gpu/drm/i915/display/intel_pmdemand.c +++ b/drivers/gpu/drm/i915/display/intel_pmdemand.c @@ -92,7 +92,7 @@ int intel_pmdemand_init(struct drm_i915_private *i915) &pmdemand_state->base, &intel_pmdemand_funcs); - if (IS_DISPLAY_IP_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_C0)) + if (IS_DISPLAY_VER_STEP(i915, IP_VER(14, 0), STEP_A0, STEP_C0)) /* Wa_14016740474 */ intel_de_rmw(i915, XELPD_CHICKEN_DCPR_3, 0, DMD_RSP_TIMEOUT_DISABLE); diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 42306bc4ba86..68141af4da54 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -351,6 +351,9 @@ static int intel_num_pps(struct drm_i915_private *i915) if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; @@ -948,6 +951,14 @@ void intel_pps_on_unlocked(struct intel_dp *intel_dp) intel_de_posting_read(dev_priv, pp_ctrl_reg); } + /* + * WA: 22019252566 + * Disable DPLS gating around power sequence. + */ + if (IS_DISPLAY_VER(dev_priv, 13, 14)) + intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, + 0, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); + pp |= PANEL_POWER_ON; if (!IS_IRONLAKE(dev_priv)) pp |= PANEL_POWER_RESET; @@ -958,6 +969,10 @@ void intel_pps_on_unlocked(struct intel_dp *intel_dp) wait_panel_on(intel_dp); intel_dp->pps.last_power_on = jiffies; + if (IS_DISPLAY_VER(dev_priv, 13, 14)) + intel_de_rmw(dev_priv, SOUTH_DSPCLK_GATE_D, + PCH_DPLSUNIT_CLOCK_GATE_DISABLE, 0); + if (IS_IRONLAKE(dev_priv)) { pp |= PANEL_POWER_RESET; /* restore panel reset bit */ intel_de_write(dev_priv, pp_ctrl_reg, pp); @@ -1468,7 +1483,7 @@ static void pps_init_registers(struct intel_dp *intel_dp, bool force_disable_vdd { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); u32 pp_on, pp_off, port_sel = 0; - int div = RUNTIME_INFO(dev_priv)->rawclk_freq / 1000; + int div = DISPLAY_RUNTIME_INFO(dev_priv)->rawclk_freq / 1000; struct pps_registers regs; enum port port = dp_to_dig_port(intel_dp)->base.port; const struct edp_power_seq *seq = &intel_dp->pps.pps_delays; diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 9cb1cdaaeefa..257526362b39 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -1586,6 +1586,12 @@ _panel_replay_compute_config(struct intel_dp *intel_dp, if (!alpm_config_valid(intel_dp, crtc_state, true)) return false; + if (crtc_state->crc_enabled) { + drm_dbg_kms(&i915->drm, + "Panel Replay not enabled because it would inhibit pipe CRC calculation\n"); + return false; + } + return true; } @@ -1862,14 +1868,14 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, * cause issues if non-supported panels are used. */ if (!intel_dp->psr.panel_replay_enabled && - (IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0) || + (IS_DISPLAY_VER_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0) || IS_ALDERLAKE_P(dev_priv))) intel_de_rmw(dev_priv, hsw_chicken_trans_reg(dev_priv, cpu_transcoder), 0, ADLP_1_BASED_X_GRANULARITY); /* Wa_16012604467:adlp,mtl[a0,b0] */ if (!intel_dp->psr.panel_replay_enabled && - IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0)) + IS_DISPLAY_VER_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0)) intel_de_rmw(dev_priv, MTL_CLKGATE_DIS_TRANS(dev_priv, cpu_transcoder), 0, @@ -2051,7 +2057,7 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) if (intel_dp->psr.sel_update_enabled) { /* Wa_16012604467:adlp,mtl[a0,b0] */ if (!intel_dp->psr.panel_replay_enabled && - IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0)) + IS_DISPLAY_VER_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0)) intel_de_rmw(dev_priv, MTL_CLKGATE_DIS_TRANS(dev_priv, cpu_transcoder), MTL_CLKGATE_DIS_TRANS_DMASC_GATING_DIS, 0); @@ -2536,7 +2542,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, /* Wa_14014971492 */ if (!crtc_state->has_panel_replay && - ((IS_DISPLAY_IP_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0) || + ((IS_DISPLAY_VER_STEP(dev_priv, IP_VER(14, 0), STEP_A0, STEP_B0) || IS_ALDERLAKE_P(dev_priv) || IS_TIGERLAKE(dev_priv))) && crtc_state->splitter.enable) crtc_state->psr2_su_area.y1 = 0; diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index d0d712405129..7cc519b402e9 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -95,7 +95,7 @@ struct intel_sdvo { struct intel_encoder base; struct i2c_adapter *i2c; - u8 slave_addr; + u8 target_addr; struct intel_sdvo_ddc ddc[3]; @@ -255,13 +255,13 @@ static bool intel_sdvo_read_byte(struct intel_sdvo *intel_sdvo, u8 addr, u8 *ch) struct drm_i915_private *i915 = to_i915(intel_sdvo->base.base.dev); struct i2c_msg msgs[] = { { - .addr = intel_sdvo->slave_addr, + .addr = intel_sdvo->target_addr, .flags = 0, .len = 1, .buf = &addr, }, { - .addr = intel_sdvo->slave_addr, + .addr = intel_sdvo->target_addr, .flags = I2C_M_RD, .len = 1, .buf = ch, @@ -483,14 +483,14 @@ static bool __intel_sdvo_write_cmd(struct intel_sdvo *intel_sdvo, u8 cmd, intel_sdvo_debug_write(intel_sdvo, cmd, args, args_len); for (i = 0; i < args_len; i++) { - msgs[i].addr = intel_sdvo->slave_addr; + msgs[i].addr = intel_sdvo->target_addr; msgs[i].flags = 0; msgs[i].len = 2; msgs[i].buf = buf + 2 *i; buf[2*i + 0] = SDVO_I2C_ARG_0 - i; buf[2*i + 1] = ((u8*)args)[i]; } - msgs[i].addr = intel_sdvo->slave_addr; + msgs[i].addr = intel_sdvo->target_addr; msgs[i].flags = 0; msgs[i].len = 2; msgs[i].buf = buf + 2*i; @@ -499,12 +499,12 @@ static bool __intel_sdvo_write_cmd(struct intel_sdvo *intel_sdvo, u8 cmd, /* the following two are to read the response */ status = SDVO_I2C_CMD_STATUS; - msgs[i+1].addr = intel_sdvo->slave_addr; + msgs[i+1].addr = intel_sdvo->target_addr; msgs[i+1].flags = 0; msgs[i+1].len = 1; msgs[i+1].buf = &status; - msgs[i+2].addr = intel_sdvo->slave_addr; + msgs[i+2].addr = intel_sdvo->target_addr; msgs[i+2].flags = I2C_M_RD; msgs[i+2].len = 1; msgs[i+2].buf = &status; @@ -2652,9 +2652,9 @@ intel_sdvo_select_i2c_bus(struct intel_sdvo *sdvo) else pin = GMBUS_PIN_DPB; - drm_dbg_kms(&dev_priv->drm, "[ENCODER:%d:%s] I2C pin %d, slave addr 0x%x\n", + drm_dbg_kms(&dev_priv->drm, "[ENCODER:%d:%s] I2C pin %d, target addr 0x%x\n", sdvo->base.base.base.id, sdvo->base.base.name, - pin, sdvo->slave_addr); + pin, sdvo->target_addr); sdvo->i2c = intel_gmbus_get_adapter(dev_priv, pin); @@ -2680,7 +2680,7 @@ intel_sdvo_is_hdmi_connector(struct intel_sdvo *intel_sdvo) } static u8 -intel_sdvo_get_slave_addr(struct intel_sdvo *sdvo) +intel_sdvo_get_target_addr(struct intel_sdvo *sdvo) { struct drm_i915_private *dev_priv = to_i915(sdvo->base.base.dev); const struct sdvo_device_mapping *my_mapping, *other_mapping; @@ -2694,15 +2694,15 @@ intel_sdvo_get_slave_addr(struct intel_sdvo *sdvo) } /* If the BIOS described our SDVO device, take advantage of it. */ - if (my_mapping->slave_addr) - return my_mapping->slave_addr; + if (my_mapping->target_addr) + return my_mapping->target_addr; /* * If the BIOS only described a different SDVO device, use the * address that it isn't using. */ - if (other_mapping->slave_addr) { - if (other_mapping->slave_addr == 0x70) + if (other_mapping->target_addr) { + if (other_mapping->target_addr == 0x70) return 0x72; else return 0x70; @@ -2919,6 +2919,7 @@ intel_sdvo_analog_init(struct intel_sdvo *intel_sdvo, u16 type) static bool intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, u16 type) { + struct intel_display *display = to_intel_display(&intel_sdvo->base); struct drm_encoder *encoder = &intel_sdvo->base.base; struct drm_i915_private *i915 = to_i915(encoder->dev); struct drm_connector *connector; @@ -2946,7 +2947,7 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, u16 type) if (!intel_sdvo_create_enhance_property(intel_sdvo, intel_sdvo_connector)) goto err; - intel_bios_init_panel_late(i915, &intel_connector->panel, NULL, NULL); + intel_bios_init_panel_late(display, &intel_connector->panel, NULL, NULL); /* * Fetch modes from VBT. For SDVO prefer the VBT mode since some @@ -3405,7 +3406,7 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, "SDVO %c", port_name(port)); intel_sdvo->sdvo_reg = sdvo_reg; - intel_sdvo->slave_addr = intel_sdvo_get_slave_addr(intel_sdvo) >> 1; + intel_sdvo->target_addr = intel_sdvo_get_target_addr(intel_sdvo) >> 1; intel_sdvo_select_i2c_bus(intel_sdvo); diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index f8cceb3e5d8e..e657b09ede99 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -48,9 +48,9 @@ #include "intel_sprite.h" #include "intel_sprite_regs.h" -static char sprite_name(struct drm_i915_private *i915, enum pipe pipe, int sprite) +static char sprite_name(struct intel_display *display, enum pipe pipe, int sprite) { - return pipe * DISPLAY_RUNTIME_INFO(i915)->num_sprites[pipe] + sprite + 'A'; + return pipe * DISPLAY_RUNTIME_INFO(display)->num_sprites[pipe] + sprite + 'A'; } static void i9xx_plane_linear_gamma(u16 gamma[8]) @@ -67,7 +67,7 @@ static void chv_sprite_update_csc(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); const struct drm_framebuffer *fb = plane_state->hw.fb; enum plane_id plane_id = plane->id; /* @@ -100,35 +100,35 @@ chv_sprite_update_csc(const struct intel_plane_state *plane_state) if (!fb->format->is_yuv) return; - intel_de_write_fw(dev_priv, SPCSCYGOFF(plane_id), + intel_de_write_fw(display, SPCSCYGOFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); - intel_de_write_fw(dev_priv, SPCSCCBOFF(plane_id), + intel_de_write_fw(display, SPCSCCBOFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); - intel_de_write_fw(dev_priv, SPCSCCROFF(plane_id), + intel_de_write_fw(display, SPCSCCROFF(plane_id), SPCSC_OOFF(0) | SPCSC_IOFF(0)); - intel_de_write_fw(dev_priv, SPCSCC01(plane_id), + intel_de_write_fw(display, SPCSCC01(plane_id), SPCSC_C1(csc[1]) | SPCSC_C0(csc[0])); - intel_de_write_fw(dev_priv, SPCSCC23(plane_id), + intel_de_write_fw(display, SPCSCC23(plane_id), SPCSC_C1(csc[3]) | SPCSC_C0(csc[2])); - intel_de_write_fw(dev_priv, SPCSCC45(plane_id), + intel_de_write_fw(display, SPCSCC45(plane_id), SPCSC_C1(csc[5]) | SPCSC_C0(csc[4])); - intel_de_write_fw(dev_priv, SPCSCC67(plane_id), + intel_de_write_fw(display, SPCSCC67(plane_id), SPCSC_C1(csc[7]) | SPCSC_C0(csc[6])); - intel_de_write_fw(dev_priv, SPCSCC8(plane_id), SPCSC_C0(csc[8])); + intel_de_write_fw(display, SPCSCC8(plane_id), SPCSC_C0(csc[8])); - intel_de_write_fw(dev_priv, SPCSCYGICLAMP(plane_id), + intel_de_write_fw(display, SPCSCYGICLAMP(plane_id), SPCSC_IMAX(1023) | SPCSC_IMIN(0)); - intel_de_write_fw(dev_priv, SPCSCCBICLAMP(plane_id), + intel_de_write_fw(display, SPCSCCBICLAMP(plane_id), SPCSC_IMAX(512) | SPCSC_IMIN(-512)); - intel_de_write_fw(dev_priv, SPCSCCRICLAMP(plane_id), + intel_de_write_fw(display, SPCSCCRICLAMP(plane_id), SPCSC_IMAX(512) | SPCSC_IMIN(-512)); - intel_de_write_fw(dev_priv, SPCSCYGOCLAMP(plane_id), + intel_de_write_fw(display, SPCSCYGOCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); - intel_de_write_fw(dev_priv, SPCSCCBOCLAMP(plane_id), + intel_de_write_fw(display, SPCSCCBOCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); - intel_de_write_fw(dev_priv, SPCSCCROCLAMP(plane_id), + intel_de_write_fw(display, SPCSCCROCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0)); } @@ -139,7 +139,7 @@ static void vlv_sprite_update_clrc(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); const struct drm_framebuffer *fb = plane_state->hw.fb; enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; @@ -168,9 +168,9 @@ vlv_sprite_update_clrc(const struct intel_plane_state *plane_state) } /* FIXME these register are single buffered :( */ - intel_de_write_fw(dev_priv, SPCLRC0(pipe, plane_id), + intel_de_write_fw(display, SPCLRC0(pipe, plane_id), SP_CONTRAST(contrast) | SP_BRIGHTNESS(brightness)); - intel_de_write_fw(dev_priv, SPCLRC1(pipe, plane_id), + intel_de_write_fw(display, SPCLRC1(pipe, plane_id), SP_SH_SIN(sh_sin) | SP_SH_COS(sh_cos)); } @@ -357,7 +357,7 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, static void vlv_sprite_update_gamma(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); const struct drm_framebuffer *fb = plane_state->hw.fb; enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; @@ -373,7 +373,7 @@ static void vlv_sprite_update_gamma(const struct intel_plane_state *plane_state) /* FIXME these register are single buffered :( */ /* The two end points are implicit (0.0 and 1.0) */ for (i = 1; i < 8 - 1; i++) - intel_de_write_fw(dev_priv, SPGAMC(pipe, plane_id, i - 1), + intel_de_write_fw(display, SPGAMC(pipe, plane_id, i - 1), gamma[i] << 16 | gamma[i] << 8 | gamma[i]); } @@ -382,7 +382,7 @@ vlv_sprite_update_noarm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; int crtc_x = plane_state->uapi.dst.x1; @@ -390,11 +390,11 @@ vlv_sprite_update_noarm(struct intel_plane *plane, u32 crtc_w = drm_rect_width(&plane_state->uapi.dst); u32 crtc_h = drm_rect_height(&plane_state->uapi.dst); - intel_de_write_fw(dev_priv, SPSTRIDE(pipe, plane_id), + intel_de_write_fw(display, SPSTRIDE(pipe, plane_id), plane_state->view.color_plane[0].mapping_stride); - intel_de_write_fw(dev_priv, SPPOS(pipe, plane_id), + intel_de_write_fw(display, SPPOS(pipe, plane_id), SP_POS_Y(crtc_y) | SP_POS_X(crtc_x)); - intel_de_write_fw(dev_priv, SPSIZE(pipe, plane_id), + intel_de_write_fw(display, SPSIZE(pipe, plane_id), SP_HEIGHT(crtc_h - 1) | SP_WIDTH(crtc_w - 1)); } @@ -403,6 +403,7 @@ vlv_sprite_update_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; @@ -420,18 +421,18 @@ vlv_sprite_update_arm(struct intel_plane *plane, chv_sprite_update_csc(plane_state); if (key->flags) { - intel_de_write_fw(dev_priv, SPKEYMINVAL(pipe, plane_id), + intel_de_write_fw(display, SPKEYMINVAL(pipe, plane_id), key->min_value); - intel_de_write_fw(dev_priv, SPKEYMSK(pipe, plane_id), + intel_de_write_fw(display, SPKEYMSK(pipe, plane_id), key->channel_mask); - intel_de_write_fw(dev_priv, SPKEYMAXVAL(pipe, plane_id), + intel_de_write_fw(display, SPKEYMAXVAL(pipe, plane_id), key->max_value); } - intel_de_write_fw(dev_priv, SPCONSTALPHA(pipe, plane_id), 0); + intel_de_write_fw(display, SPCONSTALPHA(pipe, plane_id), 0); - intel_de_write_fw(dev_priv, SPLINOFF(pipe, plane_id), linear_offset); - intel_de_write_fw(dev_priv, SPTILEOFF(pipe, plane_id), + intel_de_write_fw(display, SPLINOFF(pipe, plane_id), linear_offset); + intel_de_write_fw(display, SPTILEOFF(pipe, plane_id), SP_OFFSET_Y(y) | SP_OFFSET_X(x)); /* @@ -439,8 +440,8 @@ vlv_sprite_update_arm(struct intel_plane *plane, * disabled. Try to make the plane enable atomic by writing * the control register just before the surface register. */ - intel_de_write_fw(dev_priv, SPCNTR(pipe, plane_id), sprctl); - intel_de_write_fw(dev_priv, SPSURF(pipe, plane_id), + intel_de_write_fw(display, SPCNTR(pipe, plane_id), sprctl); + intel_de_write_fw(display, SPSURF(pipe, plane_id), intel_plane_ggtt_offset(plane_state) + sprsurf_offset); vlv_sprite_update_clrc(plane_state); @@ -451,18 +452,19 @@ static void vlv_sprite_disable_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; enum plane_id plane_id = plane->id; - intel_de_write_fw(dev_priv, SPCNTR(pipe, plane_id), 0); - intel_de_write_fw(dev_priv, SPSURF(pipe, plane_id), 0); + intel_de_write_fw(display, SPCNTR(pipe, plane_id), 0); + intel_de_write_fw(display, SPSURF(pipe, plane_id), 0); } static bool vlv_sprite_get_hw_state(struct intel_plane *plane, enum pipe *pipe) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; enum plane_id plane_id = plane->id; @@ -474,7 +476,7 @@ vlv_sprite_get_hw_state(struct intel_plane *plane, if (!wakeref) return false; - ret = intel_de_read(dev_priv, SPCNTR(plane->pipe, plane_id)) & SP_ENABLE; + ret = intel_de_read(display, SPCNTR(plane->pipe, plane_id)) & SP_ENABLE; *pipe = plane->pipe; @@ -766,7 +768,7 @@ static void ivb_sprite_linear_gamma(const struct intel_plane_state *plane_state, static void ivb_sprite_update_gamma(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; u16 gamma[18]; int i; @@ -778,17 +780,17 @@ static void ivb_sprite_update_gamma(const struct intel_plane_state *plane_state) /* FIXME these register are single buffered :( */ for (i = 0; i < 16; i++) - intel_de_write_fw(dev_priv, SPRGAMC(pipe, i), + intel_de_write_fw(display, SPRGAMC(pipe, i), gamma[i] << 20 | gamma[i] << 10 | gamma[i]); - intel_de_write_fw(dev_priv, SPRGAMC16(pipe, 0), gamma[i]); - intel_de_write_fw(dev_priv, SPRGAMC16(pipe, 1), gamma[i]); - intel_de_write_fw(dev_priv, SPRGAMC16(pipe, 2), gamma[i]); + intel_de_write_fw(display, SPRGAMC16(pipe, 0), gamma[i]); + intel_de_write_fw(display, SPRGAMC16(pipe, 1), gamma[i]); + intel_de_write_fw(display, SPRGAMC16(pipe, 2), gamma[i]); i++; - intel_de_write_fw(dev_priv, SPRGAMC17(pipe, 0), gamma[i]); - intel_de_write_fw(dev_priv, SPRGAMC17(pipe, 1), gamma[i]); - intel_de_write_fw(dev_priv, SPRGAMC17(pipe, 2), gamma[i]); + intel_de_write_fw(display, SPRGAMC17(pipe, 0), gamma[i]); + intel_de_write_fw(display, SPRGAMC17(pipe, 1), gamma[i]); + intel_de_write_fw(display, SPRGAMC17(pipe, 2), gamma[i]); i++; } @@ -797,6 +799,7 @@ ivb_sprite_update_noarm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; int crtc_x = plane_state->uapi.dst.x1; @@ -812,14 +815,14 @@ ivb_sprite_update_noarm(struct intel_plane *plane, SPRITE_SRC_WIDTH(src_w - 1) | SPRITE_SRC_HEIGHT(src_h - 1); - intel_de_write_fw(dev_priv, SPRSTRIDE(pipe), + intel_de_write_fw(display, SPRSTRIDE(pipe), plane_state->view.color_plane[0].mapping_stride); - intel_de_write_fw(dev_priv, SPRPOS(pipe), + intel_de_write_fw(display, SPRPOS(pipe), SPRITE_POS_Y(crtc_y) | SPRITE_POS_X(crtc_x)); - intel_de_write_fw(dev_priv, SPRSIZE(pipe), + intel_de_write_fw(display, SPRSIZE(pipe), SPRITE_HEIGHT(crtc_h - 1) | SPRITE_WIDTH(crtc_w - 1)); if (IS_IVYBRIDGE(dev_priv)) - intel_de_write_fw(dev_priv, SPRSCALE(pipe), sprscale); + intel_de_write_fw(display, SPRSCALE(pipe), sprscale); } static void @@ -827,6 +830,7 @@ ivb_sprite_update_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; @@ -840,20 +844,20 @@ ivb_sprite_update_arm(struct intel_plane *plane, linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); if (key->flags) { - intel_de_write_fw(dev_priv, SPRKEYVAL(pipe), key->min_value); - intel_de_write_fw(dev_priv, SPRKEYMSK(pipe), + intel_de_write_fw(display, SPRKEYVAL(pipe), key->min_value); + intel_de_write_fw(display, SPRKEYMSK(pipe), key->channel_mask); - intel_de_write_fw(dev_priv, SPRKEYMAX(pipe), key->max_value); + intel_de_write_fw(display, SPRKEYMAX(pipe), key->max_value); } /* HSW consolidates SPRTILEOFF and SPRLINOFF into a single SPROFFSET * register */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - intel_de_write_fw(dev_priv, SPROFFSET(pipe), + intel_de_write_fw(display, SPROFFSET(pipe), SPRITE_OFFSET_Y(y) | SPRITE_OFFSET_X(x)); } else { - intel_de_write_fw(dev_priv, SPRLINOFF(pipe), linear_offset); - intel_de_write_fw(dev_priv, SPRTILEOFF(pipe), + intel_de_write_fw(display, SPRLINOFF(pipe), linear_offset); + intel_de_write_fw(display, SPRTILEOFF(pipe), SPRITE_OFFSET_Y(y) | SPRITE_OFFSET_X(x)); } @@ -862,8 +866,8 @@ ivb_sprite_update_arm(struct intel_plane *plane, * disabled. Try to make the plane enable atomic by writing * the control register just before the surface register. */ - intel_de_write_fw(dev_priv, SPRCTL(pipe), sprctl); - intel_de_write_fw(dev_priv, SPRSURF(pipe), + intel_de_write_fw(display, SPRCTL(pipe), sprctl); + intel_de_write_fw(display, SPRSURF(pipe), intel_plane_ggtt_offset(plane_state) + sprsurf_offset); ivb_sprite_update_gamma(plane_state); @@ -873,20 +877,22 @@ static void ivb_sprite_disable_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; - intel_de_write_fw(dev_priv, SPRCTL(pipe), 0); + intel_de_write_fw(display, SPRCTL(pipe), 0); /* Disable the scaler */ if (IS_IVYBRIDGE(dev_priv)) - intel_de_write_fw(dev_priv, SPRSCALE(pipe), 0); - intel_de_write_fw(dev_priv, SPRSURF(pipe), 0); + intel_de_write_fw(display, SPRSCALE(pipe), 0); + intel_de_write_fw(display, SPRSURF(pipe), 0); } static bool ivb_sprite_get_hw_state(struct intel_plane *plane, enum pipe *pipe) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; intel_wakeref_t wakeref; @@ -897,7 +903,7 @@ ivb_sprite_get_hw_state(struct intel_plane *plane, if (!wakeref) return false; - ret = intel_de_read(dev_priv, SPRCTL(plane->pipe)) & SPRITE_ENABLE; + ret = intel_de_read(display, SPRCTL(plane->pipe)) & SPRITE_ENABLE; *pipe = plane->pipe; @@ -1073,7 +1079,7 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, static void g4x_sprite_update_gamma(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); const struct drm_framebuffer *fb = plane_state->hw.fb; enum pipe pipe = plane->pipe; u16 gamma[8]; @@ -1088,7 +1094,7 @@ static void g4x_sprite_update_gamma(const struct intel_plane_state *plane_state) /* FIXME these register are single buffered :( */ /* The two end points are implicit (0.0 and 1.0) */ for (i = 1; i < 8 - 1; i++) - intel_de_write_fw(dev_priv, DVSGAMC_G4X(pipe, i - 1), + intel_de_write_fw(display, DVSGAMC_G4X(pipe, i - 1), gamma[i] << 16 | gamma[i] << 8 | gamma[i]); } @@ -1103,7 +1109,7 @@ static void ilk_sprite_linear_gamma(u16 gamma[17]) static void ilk_sprite_update_gamma(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); const struct drm_framebuffer *fb = plane_state->hw.fb; enum pipe pipe = plane->pipe; u16 gamma[17]; @@ -1117,12 +1123,12 @@ static void ilk_sprite_update_gamma(const struct intel_plane_state *plane_state) /* FIXME these register are single buffered :( */ for (i = 0; i < 16; i++) - intel_de_write_fw(dev_priv, DVSGAMC_ILK(pipe, i), + intel_de_write_fw(display, DVSGAMC_ILK(pipe, i), gamma[i] << 20 | gamma[i] << 10 | gamma[i]); - intel_de_write_fw(dev_priv, DVSGAMCMAX_ILK(pipe, 0), gamma[i]); - intel_de_write_fw(dev_priv, DVSGAMCMAX_ILK(pipe, 1), gamma[i]); - intel_de_write_fw(dev_priv, DVSGAMCMAX_ILK(pipe, 2), gamma[i]); + intel_de_write_fw(display, DVSGAMCMAX_ILK(pipe, 0), gamma[i]); + intel_de_write_fw(display, DVSGAMCMAX_ILK(pipe, 1), gamma[i]); + intel_de_write_fw(display, DVSGAMCMAX_ILK(pipe, 2), gamma[i]); i++; } @@ -1131,7 +1137,7 @@ g4x_sprite_update_noarm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; int crtc_x = plane_state->uapi.dst.x1; int crtc_y = plane_state->uapi.dst.y1; @@ -1146,13 +1152,13 @@ g4x_sprite_update_noarm(struct intel_plane *plane, DVS_SRC_WIDTH(src_w - 1) | DVS_SRC_HEIGHT(src_h - 1); - intel_de_write_fw(dev_priv, DVSSTRIDE(pipe), + intel_de_write_fw(display, DVSSTRIDE(pipe), plane_state->view.color_plane[0].mapping_stride); - intel_de_write_fw(dev_priv, DVSPOS(pipe), + intel_de_write_fw(display, DVSPOS(pipe), DVS_POS_Y(crtc_y) | DVS_POS_X(crtc_x)); - intel_de_write_fw(dev_priv, DVSSIZE(pipe), + intel_de_write_fw(display, DVSSIZE(pipe), DVS_HEIGHT(crtc_h - 1) | DVS_WIDTH(crtc_w - 1)); - intel_de_write_fw(dev_priv, DVSSCALE(pipe), dvsscale); + intel_de_write_fw(display, DVSSCALE(pipe), dvsscale); } static void @@ -1160,6 +1166,7 @@ g4x_sprite_update_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; @@ -1173,14 +1180,14 @@ g4x_sprite_update_arm(struct intel_plane *plane, linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); if (key->flags) { - intel_de_write_fw(dev_priv, DVSKEYVAL(pipe), key->min_value); - intel_de_write_fw(dev_priv, DVSKEYMSK(pipe), + intel_de_write_fw(display, DVSKEYVAL(pipe), key->min_value); + intel_de_write_fw(display, DVSKEYMSK(pipe), key->channel_mask); - intel_de_write_fw(dev_priv, DVSKEYMAX(pipe), key->max_value); + intel_de_write_fw(display, DVSKEYMAX(pipe), key->max_value); } - intel_de_write_fw(dev_priv, DVSLINOFF(pipe), linear_offset); - intel_de_write_fw(dev_priv, DVSTILEOFF(pipe), + intel_de_write_fw(display, DVSLINOFF(pipe), linear_offset); + intel_de_write_fw(display, DVSTILEOFF(pipe), DVS_OFFSET_Y(y) | DVS_OFFSET_X(x)); /* @@ -1188,8 +1195,8 @@ g4x_sprite_update_arm(struct intel_plane *plane, * disabled. Try to make the plane enable atomic by writing * the control register just before the surface register. */ - intel_de_write_fw(dev_priv, DVSCNTR(pipe), dvscntr); - intel_de_write_fw(dev_priv, DVSSURF(pipe), + intel_de_write_fw(display, DVSCNTR(pipe), dvscntr); + intel_de_write_fw(display, DVSSURF(pipe), intel_plane_ggtt_offset(plane_state) + dvssurf_offset); if (IS_G4X(dev_priv)) @@ -1202,19 +1209,20 @@ static void g4x_sprite_disable_arm(struct intel_plane *plane, const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_display *display = to_intel_display(plane->base.dev); enum pipe pipe = plane->pipe; - intel_de_write_fw(dev_priv, DVSCNTR(pipe), 0); + intel_de_write_fw(display, DVSCNTR(pipe), 0); /* Disable the scaler */ - intel_de_write_fw(dev_priv, DVSSCALE(pipe), 0); - intel_de_write_fw(dev_priv, DVSSURF(pipe), 0); + intel_de_write_fw(display, DVSSCALE(pipe), 0); + intel_de_write_fw(display, DVSSURF(pipe), 0); } static bool g4x_sprite_get_hw_state(struct intel_plane *plane, enum pipe *pipe) { + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum intel_display_power_domain power_domain; intel_wakeref_t wakeref; @@ -1225,7 +1233,7 @@ g4x_sprite_get_hw_state(struct intel_plane *plane, if (!wakeref) return false; - ret = intel_de_read(dev_priv, DVSCNTR(plane->pipe)) & DVS_ENABLE; + ret = intel_de_read(display, DVSCNTR(plane->pipe)) & DVS_ENABLE; *pipe = plane->pipe; @@ -1255,7 +1263,7 @@ static int g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { - struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev); + struct intel_display *display = to_intel_display(crtc_state); const struct drm_framebuffer *fb = plane_state->hw.fb; const struct drm_rect *src = &plane_state->uapi.src; const struct drm_rect *dst = &plane_state->uapi.dst; @@ -1281,7 +1289,8 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) { if (src_h & 1) { - drm_dbg_kms(&i915->drm, "Source height must be even with interlaced modes\n"); + drm_dbg_kms(display->drm, + "Source height must be even with interlaced modes\n"); return -EINVAL; } min_height = 6; @@ -1293,19 +1302,22 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, if (src_w < min_width || src_h < min_height || src_w > 2048 || src_h > 2048) { - drm_dbg_kms(&i915->drm, "Source dimensions (%dx%d) exceed hardware limits (%dx%d - %dx%d)\n", + drm_dbg_kms(display->drm, + "Source dimensions (%dx%d) exceed hardware limits (%dx%d - %dx%d)\n", src_w, src_h, min_width, min_height, 2048, 2048); return -EINVAL; } if (width_bytes > 4096) { - drm_dbg_kms(&i915->drm, "Fetch width (%d) exceeds hardware max with scaling (%u)\n", + drm_dbg_kms(display->drm, + "Fetch width (%d) exceeds hardware max with scaling (%u)\n", width_bytes, 4096); return -EINVAL; } if (stride > 4096) { - drm_dbg_kms(&i915->drm, "Stride (%u) exceeds hardware max with scaling (%u)\n", + drm_dbg_kms(display->drm, + "Stride (%u) exceeds hardware max with scaling (%u)\n", stride, 4096); return -EINVAL; } @@ -1317,6 +1329,7 @@ static int g4x_sprite_check(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { + struct intel_display *display = to_intel_display(crtc_state); struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); int min_scale = DRM_PLANE_NO_SCALING; @@ -1324,7 +1337,7 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state, int ret; if (g4x_fb_scalable(plane_state->hw.fb)) { - if (DISPLAY_VER(dev_priv) < 7) { + if (DISPLAY_VER(display) < 7) { min_scale = 1; max_scale = 16 << 16; } else if (IS_IVYBRIDGE(dev_priv)) { @@ -1353,7 +1366,7 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - if (DISPLAY_VER(dev_priv) >= 7) + if (DISPLAY_VER(display) >= 7) plane_state->ctl = ivb_sprite_ctl(crtc_state, plane_state); else plane_state->ctl = g4x_sprite_ctl(crtc_state, plane_state); @@ -1364,6 +1377,7 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state, int chv_plane_check_rotation(const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane); + struct intel_display *display = to_intel_display(plane->base.dev); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); unsigned int rotation = plane_state->hw.rotation; @@ -1371,7 +1385,7 @@ int chv_plane_check_rotation(const struct intel_plane_state *plane_state) if (IS_CHERRYVIEW(dev_priv) && rotation & DRM_MODE_ROTATE_180 && rotation & DRM_MODE_REFLECT_X) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Cannot rotate and reflect at the same time\n"); return -EINVAL; } @@ -1573,6 +1587,7 @@ struct intel_plane * intel_sprite_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, int sprite) { + struct intel_display *display = &dev_priv->display; struct intel_plane *plane; const struct drm_plane_funcs *plane_funcs; unsigned int supported_rotations; @@ -1604,7 +1619,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, } plane_funcs = &vlv_sprite_funcs; - } else if (DISPLAY_VER(dev_priv) >= 7) { + } else if (DISPLAY_VER(display) >= 7) { plane->update_noarm = ivb_sprite_update_noarm; plane->update_arm = ivb_sprite_update_arm; plane->disable_arm = ivb_sprite_disable_arm; @@ -1663,11 +1678,11 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, modifiers = intel_fb_plane_get_modifiers(dev_priv, INTEL_PLANE_CAP_TILING_X); - ret = drm_universal_plane_init(&dev_priv->drm, &plane->base, + ret = drm_universal_plane_init(display->drm, &plane->base, 0, plane_funcs, formats, num_formats, modifiers, DRM_PLANE_TYPE_OVERLAY, - "sprite %c", sprite_name(dev_priv, pipe, sprite)); + "sprite %c", sprite_name(display, pipe, sprite)); kfree(modifiers); if (ret) diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 9887967b2ca5..6f2ee7dbc43b 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -393,6 +393,9 @@ void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, bool lane_reversal = dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; u32 val; + if (DISPLAY_VER(i915) >= 14) + return; + drm_WARN_ON(&i915->drm, lane_reversal && tc->mode != TC_PORT_LEGACY); diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index 9df0f1263913..581844d1db9a 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -914,8 +914,8 @@ static struct intel_tv *intel_attached_tv(struct intel_connector *connector) static bool intel_tv_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 tmp = intel_de_read(dev_priv, TV_CTL); + struct intel_display *display = to_intel_display(encoder); + u32 tmp = intel_de_read(display, TV_CTL); *pipe = (tmp & TV_ENC_PIPE_SEL_MASK) >> TV_ENC_PIPE_SEL_SHIFT; @@ -928,13 +928,12 @@ intel_enable_tv(struct intel_atomic_state *state, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(state); /* Prevents vblank waits from timing out in intel_tv_detect_type() */ intel_crtc_wait_for_next_vblank(to_intel_crtc(pipe_config->uapi.crtc)); - intel_de_rmw(dev_priv, TV_CTL, 0, TV_ENC_ENABLE); + intel_de_rmw(display, TV_CTL, 0, TV_ENC_ENABLE); } static void @@ -943,10 +942,9 @@ intel_disable_tv(struct intel_atomic_state *state, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(state); - intel_de_rmw(dev_priv, TV_CTL, TV_ENC_ENABLE, 0); + intel_de_rmw(display, TV_CTL, TV_ENC_ENABLE, 0); } static const struct tv_mode *intel_tv_mode_find(const struct drm_connector_state *conn_state) @@ -960,9 +958,10 @@ static enum drm_mode_status intel_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct intel_display *display = to_intel_display(connector->dev); struct drm_i915_private *i915 = to_i915(connector->dev); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); - int max_dotclk = i915->display.cdclk.max_dotclk_freq; + int max_dotclk = display->cdclk.max_dotclk_freq; enum drm_mode_status status; status = intel_cpu_transcoder_mode_valid(i915, mode); @@ -1092,6 +1091,7 @@ static void intel_tv_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; @@ -1104,11 +1104,11 @@ intel_tv_get_config(struct intel_encoder *encoder, pipe_config->output_types |= BIT(INTEL_OUTPUT_TVOUT); - tv_ctl = intel_de_read(dev_priv, TV_CTL); - hctl1 = intel_de_read(dev_priv, TV_H_CTL_1); - hctl3 = intel_de_read(dev_priv, TV_H_CTL_3); - vctl1 = intel_de_read(dev_priv, TV_V_CTL_1); - vctl2 = intel_de_read(dev_priv, TV_V_CTL_2); + tv_ctl = intel_de_read(display, TV_CTL); + hctl1 = intel_de_read(display, TV_H_CTL_1); + hctl3 = intel_de_read(display, TV_H_CTL_3); + vctl1 = intel_de_read(display, TV_V_CTL_1); + vctl2 = intel_de_read(display, TV_V_CTL_2); tv_mode.htotal = (hctl1 & TV_HTOTAL_MASK) >> TV_HTOTAL_SHIFT; tv_mode.hsync_end = (hctl1 & TV_HSYNC_END_MASK) >> TV_HSYNC_END_SHIFT; @@ -1143,17 +1143,17 @@ intel_tv_get_config(struct intel_encoder *encoder, break; } - tmp = intel_de_read(dev_priv, TV_WIN_POS); + tmp = intel_de_read(display, TV_WIN_POS); xpos = tmp >> 16; ypos = tmp & 0xffff; - tmp = intel_de_read(dev_priv, TV_WIN_SIZE); + tmp = intel_de_read(display, TV_WIN_SIZE); xsize = tmp >> 16; ysize = tmp & 0xffff; intel_tv_mode_to_mode(&mode, &tv_mode, pipe_config->port_clock); - drm_dbg_kms(&dev_priv->drm, "TV mode: " DRM_MODE_FMT "\n", + drm_dbg_kms(display->drm, "TV mode: " DRM_MODE_FMT "\n", DRM_MODE_ARG(&mode)); intel_tv_scale_mode_horiz(&mode, hdisplay, @@ -1171,10 +1171,10 @@ intel_tv_get_config(struct intel_encoder *encoder, I915_MODE_FLAG_USE_SCANLINE_COUNTER; } -static bool intel_tv_source_too_wide(struct drm_i915_private *dev_priv, +static bool intel_tv_source_too_wide(struct intel_display *display, int hdisplay) { - return DISPLAY_VER(dev_priv) == 3 && hdisplay > 1024; + return DISPLAY_VER(display) == 3 && hdisplay > 1024; } static bool intel_tv_vert_scaling(const struct drm_display_mode *tv_mode, @@ -1192,6 +1192,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + struct intel_display *display = to_intel_display(encoder); struct intel_atomic_state *state = to_intel_atomic_state(pipe_config->uapi.state); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); @@ -1214,7 +1215,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, pipe_config->sink_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; - drm_dbg_kms(&dev_priv->drm, "forcing bpc to 8 for TV\n"); + drm_dbg_kms(display->drm, "forcing bpc to 8 for TV\n"); pipe_config->pipe_bpp = 8*3; pipe_config->port_clock = tv_mode->clock; @@ -1228,14 +1229,14 @@ intel_tv_compute_config(struct intel_encoder *encoder, intel_tv_mode_to_mode(adjusted_mode, tv_mode, pipe_config->port_clock); drm_mode_set_crtcinfo(adjusted_mode, 0); - if (intel_tv_source_too_wide(dev_priv, hdisplay) || + if (intel_tv_source_too_wide(display, hdisplay) || !intel_tv_vert_scaling(adjusted_mode, conn_state, vdisplay)) { int extra, top, bottom; extra = adjusted_mode->crtc_vdisplay - vdisplay; if (extra < 0) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "No vertical scaling for >1024 pixel wide modes\n"); return -EINVAL; } @@ -1269,7 +1270,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, tv_conn_state->bypass_vfilter = false; } - drm_dbg_kms(&dev_priv->drm, "TV mode: " DRM_MODE_FMT "\n", + drm_dbg_kms(display->drm, "TV mode: " DRM_MODE_FMT "\n", DRM_MODE_ARG(adjusted_mode)); /* @@ -1355,7 +1356,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, } static void -set_tv_mode_timings(struct drm_i915_private *dev_priv, +set_tv_mode_timings(struct intel_display *display, const struct tv_mode *tv_mode, bool burst_ena) { @@ -1401,32 +1402,32 @@ set_tv_mode_timings(struct drm_i915_private *dev_priv, vctl7 = (tv_mode->vburst_start_f4 << TV_VBURST_START_F4_SHIFT) | (tv_mode->vburst_end_f4 << TV_VBURST_END_F4_SHIFT); - intel_de_write(dev_priv, TV_H_CTL_1, hctl1); - intel_de_write(dev_priv, TV_H_CTL_2, hctl2); - intel_de_write(dev_priv, TV_H_CTL_3, hctl3); - intel_de_write(dev_priv, TV_V_CTL_1, vctl1); - intel_de_write(dev_priv, TV_V_CTL_2, vctl2); - intel_de_write(dev_priv, TV_V_CTL_3, vctl3); - intel_de_write(dev_priv, TV_V_CTL_4, vctl4); - intel_de_write(dev_priv, TV_V_CTL_5, vctl5); - intel_de_write(dev_priv, TV_V_CTL_6, vctl6); - intel_de_write(dev_priv, TV_V_CTL_7, vctl7); + intel_de_write(display, TV_H_CTL_1, hctl1); + intel_de_write(display, TV_H_CTL_2, hctl2); + intel_de_write(display, TV_H_CTL_3, hctl3); + intel_de_write(display, TV_V_CTL_1, vctl1); + intel_de_write(display, TV_V_CTL_2, vctl2); + intel_de_write(display, TV_V_CTL_3, vctl3); + intel_de_write(display, TV_V_CTL_4, vctl4); + intel_de_write(display, TV_V_CTL_5, vctl5); + intel_de_write(display, TV_V_CTL_6, vctl6); + intel_de_write(display, TV_V_CTL_7, vctl7); } -static void set_color_conversion(struct drm_i915_private *dev_priv, +static void set_color_conversion(struct intel_display *display, const struct color_conversion *color_conversion) { - intel_de_write(dev_priv, TV_CSC_Y, + intel_de_write(display, TV_CSC_Y, (color_conversion->ry << 16) | color_conversion->gy); - intel_de_write(dev_priv, TV_CSC_Y2, + intel_de_write(display, TV_CSC_Y2, (color_conversion->by << 16) | color_conversion->ay); - intel_de_write(dev_priv, TV_CSC_U, + intel_de_write(display, TV_CSC_U, (color_conversion->ru << 16) | color_conversion->gu); - intel_de_write(dev_priv, TV_CSC_U2, + intel_de_write(display, TV_CSC_U2, (color_conversion->bu << 16) | color_conversion->au); - intel_de_write(dev_priv, TV_CSC_V, + intel_de_write(display, TV_CSC_V, (color_conversion->rv << 16) | color_conversion->gv); - intel_de_write(dev_priv, TV_CSC_V2, + intel_de_write(display, TV_CSC_V2, (color_conversion->bv << 16) | color_conversion->av); } @@ -1435,6 +1436,7 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, const struct intel_crtc_state *pipe_config, const struct drm_connector_state *conn_state) { + struct intel_display *display = to_intel_display(encoder); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct intel_tv *intel_tv = enc_to_tv(encoder); @@ -1450,7 +1452,7 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, int xpos, ypos; unsigned int xsize, ysize; - tv_ctl = intel_de_read(dev_priv, TV_CTL); + tv_ctl = intel_de_read(display, TV_CTL); tv_ctl &= TV_CTL_SAVE; switch (intel_tv->type) { @@ -1525,21 +1527,21 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, if (IS_I915GM(dev_priv)) tv_ctl |= TV_ENC_C0_FIX | TV_ENC_SDP_FIX; - set_tv_mode_timings(dev_priv, tv_mode, burst_ena); + set_tv_mode_timings(display, tv_mode, burst_ena); - intel_de_write(dev_priv, TV_SC_CTL_1, scctl1); - intel_de_write(dev_priv, TV_SC_CTL_2, scctl2); - intel_de_write(dev_priv, TV_SC_CTL_3, scctl3); + intel_de_write(display, TV_SC_CTL_1, scctl1); + intel_de_write(display, TV_SC_CTL_2, scctl2); + intel_de_write(display, TV_SC_CTL_3, scctl3); - set_color_conversion(dev_priv, color_conversion); + set_color_conversion(display, color_conversion); - if (DISPLAY_VER(dev_priv) >= 4) - intel_de_write(dev_priv, TV_CLR_KNOBS, 0x00404000); + if (DISPLAY_VER(display) >= 4) + intel_de_write(display, TV_CLR_KNOBS, 0x00404000); else - intel_de_write(dev_priv, TV_CLR_KNOBS, 0x00606000); + intel_de_write(display, TV_CLR_KNOBS, 0x00606000); if (video_levels) - intel_de_write(dev_priv, TV_CLR_LEVEL, + intel_de_write(display, TV_CLR_LEVEL, ((video_levels->black << TV_BLACK_LEVEL_SHIFT) | (video_levels->blank << TV_BLANK_LEVEL_SHIFT))); assert_transcoder_disabled(dev_priv, pipe_config->cpu_transcoder); @@ -1548,7 +1550,7 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, tv_filter_ctl = TV_AUTO_SCALE; if (tv_conn_state->bypass_vfilter) tv_filter_ctl |= TV_V_FILTER_BYPASS; - intel_de_write(dev_priv, TV_FILTER_CTL_1, tv_filter_ctl); + intel_de_write(display, TV_FILTER_CTL_1, tv_filter_ctl); xsize = tv_mode->hblank_start - tv_mode->hblank_end; ysize = intel_tv_mode_vdisplay(tv_mode); @@ -1559,31 +1561,32 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, conn_state->tv.margins.right); ysize -= (tv_conn_state->margins.top + tv_conn_state->margins.bottom); - intel_de_write(dev_priv, TV_WIN_POS, (xpos << 16) | ypos); - intel_de_write(dev_priv, TV_WIN_SIZE, (xsize << 16) | ysize); + intel_de_write(display, TV_WIN_POS, (xpos << 16) | ypos); + intel_de_write(display, TV_WIN_SIZE, (xsize << 16) | ysize); j = 0; for (i = 0; i < 60; i++) - intel_de_write(dev_priv, TV_H_LUMA(i), + intel_de_write(display, TV_H_LUMA(i), tv_mode->filter_table[j++]); for (i = 0; i < 60; i++) - intel_de_write(dev_priv, TV_H_CHROMA(i), + intel_de_write(display, TV_H_CHROMA(i), tv_mode->filter_table[j++]); for (i = 0; i < 43; i++) - intel_de_write(dev_priv, TV_V_LUMA(i), + intel_de_write(display, TV_V_LUMA(i), tv_mode->filter_table[j++]); for (i = 0; i < 43; i++) - intel_de_write(dev_priv, TV_V_CHROMA(i), + intel_de_write(display, TV_V_CHROMA(i), tv_mode->filter_table[j++]); - intel_de_write(dev_priv, TV_DAC, - intel_de_read(dev_priv, TV_DAC) & TV_DAC_SAVE); - intel_de_write(dev_priv, TV_CTL, tv_ctl); + intel_de_write(display, TV_DAC, + intel_de_read(display, TV_DAC) & TV_DAC_SAVE); + intel_de_write(display, TV_CTL, tv_ctl); } static int intel_tv_detect_type(struct intel_tv *intel_tv, struct drm_connector *connector) { + struct intel_display *display = to_intel_display(connector->dev); struct intel_crtc *crtc = to_intel_crtc(connector->state->crtc); struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -1600,8 +1603,8 @@ intel_tv_detect_type(struct intel_tv *intel_tv, spin_unlock_irq(&dev_priv->irq_lock); } - save_tv_dac = tv_dac = intel_de_read(dev_priv, TV_DAC); - save_tv_ctl = tv_ctl = intel_de_read(dev_priv, TV_CTL); + save_tv_dac = tv_dac = intel_de_read(display, TV_DAC); + save_tv_ctl = tv_ctl = intel_de_read(display, TV_CTL); /* Poll for TV detection */ tv_ctl &= ~(TV_ENC_ENABLE | TV_ENC_PIPE_SEL_MASK | TV_TEST_MODE_MASK); @@ -1627,15 +1630,15 @@ intel_tv_detect_type(struct intel_tv *intel_tv, tv_dac &= ~(TVDAC_STATE_CHG_EN | TVDAC_A_SENSE_CTL | TVDAC_B_SENSE_CTL | TVDAC_C_SENSE_CTL); - intel_de_write(dev_priv, TV_CTL, tv_ctl); - intel_de_write(dev_priv, TV_DAC, tv_dac); - intel_de_posting_read(dev_priv, TV_DAC); + intel_de_write(display, TV_CTL, tv_ctl); + intel_de_write(display, TV_DAC, tv_dac); + intel_de_posting_read(display, TV_DAC); intel_crtc_wait_for_next_vblank(crtc); type = -1; - tv_dac = intel_de_read(dev_priv, TV_DAC); - drm_dbg_kms(&dev_priv->drm, "TV detected: %x, %x\n", tv_ctl, tv_dac); + tv_dac = intel_de_read(display, TV_DAC); + drm_dbg_kms(display->drm, "TV detected: %x, %x\n", tv_ctl, tv_dac); /* * A B C * 0 1 1 Composite @@ -1643,25 +1646,25 @@ intel_tv_detect_type(struct intel_tv *intel_tv, * 0 0 0 Component */ if ((tv_dac & TVDAC_SENSE_MASK) == (TVDAC_B_SENSE | TVDAC_C_SENSE)) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Detected Composite TV connection\n"); type = DRM_MODE_CONNECTOR_Composite; } else if ((tv_dac & (TVDAC_A_SENSE|TVDAC_B_SENSE)) == TVDAC_A_SENSE) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Detected S-Video TV connection\n"); type = DRM_MODE_CONNECTOR_SVIDEO; } else if ((tv_dac & TVDAC_SENSE_MASK) == 0) { - drm_dbg_kms(&dev_priv->drm, + drm_dbg_kms(display->drm, "Detected Component TV connection\n"); type = DRM_MODE_CONNECTOR_Component; } else { - drm_dbg_kms(&dev_priv->drm, "Unrecognised TV connection\n"); + drm_dbg_kms(display->drm, "Unrecognised TV connection\n"); type = -1; } - intel_de_write(dev_priv, TV_DAC, save_tv_dac & ~TVDAC_STATE_CHG_EN); - intel_de_write(dev_priv, TV_CTL, save_tv_ctl); - intel_de_posting_read(dev_priv, TV_CTL); + intel_de_write(display, TV_DAC, save_tv_dac & ~TVDAC_STATE_CHG_EN); + intel_de_write(display, TV_CTL, save_tv_ctl); + intel_de_posting_read(display, TV_CTL); /* For unknown reasons the hw barfs if we don't do this vblank wait. */ intel_crtc_wait_for_next_vblank(crtc); @@ -1711,12 +1714,13 @@ intel_tv_detect(struct drm_connector *connector, struct drm_modeset_acquire_ctx *ctx, bool force) { + struct intel_display *display = to_intel_display(connector->dev); struct drm_i915_private *i915 = to_i915(connector->dev); struct intel_tv *intel_tv = intel_attached_tv(to_intel_connector(connector)); enum drm_connector_status status; int type; - drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] force=%d\n", + drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] force=%d\n", connector->base.id, connector->name, force); if (!intel_display_device_enabled(i915)) @@ -1791,7 +1795,7 @@ intel_tv_set_mode_type(struct drm_display_mode *mode, static int intel_tv_get_modes(struct drm_connector *connector) { - struct drm_i915_private *dev_priv = to_i915(connector->dev); + struct intel_display *display = to_intel_display(connector->dev); const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int i, count = 0; @@ -1805,7 +1809,7 @@ intel_tv_get_modes(struct drm_connector *connector) continue; /* no vertical scaling with wide sources on gen3 */ - if (DISPLAY_VER(dev_priv) == 3 && input->w > 1024 && + if (DISPLAY_VER(display) == 3 && input->w > 1024 && input->h > intel_tv_mode_vdisplay(tv_mode)) continue; @@ -1822,7 +1826,8 @@ intel_tv_get_modes(struct drm_connector *connector) */ intel_tv_mode_to_mode(mode, tv_mode, tv_mode->clock); if (count == 0) { - drm_dbg_kms(&dev_priv->drm, "TV mode: " DRM_MODE_FMT "\n", + drm_dbg_kms(display->drm, + "TV mode: " DRM_MODE_FMT "\n", DRM_MODE_ARG(mode)); } intel_tv_scale_mode_horiz(mode, input->w, 0, 0); @@ -1887,7 +1892,7 @@ static const struct drm_encoder_funcs intel_tv_enc_funcs = { static void intel_tv_add_properties(struct drm_connector *connector) { - struct drm_i915_private *i915 = to_i915(connector->dev); + struct intel_display *display = to_intel_display(connector->dev); struct drm_connector_state *conn_state = connector->state; const char *tv_format_names[ARRAY_SIZE(tv_modes)]; int i; @@ -1903,32 +1908,32 @@ static void intel_tv_add_properties(struct drm_connector *connector) /* Create TV properties then attach current values */ for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { /* 1080p50/1080p60 not supported on gen3 */ - if (DISPLAY_VER(i915) == 3 && tv_modes[i].oversample == 1) + if (DISPLAY_VER(display) == 3 && tv_modes[i].oversample == 1) break; tv_format_names[i] = tv_modes[i].name; } - drm_mode_create_tv_properties_legacy(&i915->drm, i, tv_format_names); + drm_mode_create_tv_properties_legacy(display->drm, i, tv_format_names); drm_object_attach_property(&connector->base, - i915->drm.mode_config.legacy_tv_mode_property, + display->drm->mode_config.legacy_tv_mode_property, conn_state->tv.legacy_mode); drm_object_attach_property(&connector->base, - i915->drm.mode_config.tv_left_margin_property, + display->drm->mode_config.tv_left_margin_property, conn_state->tv.margins.left); drm_object_attach_property(&connector->base, - i915->drm.mode_config.tv_top_margin_property, + display->drm->mode_config.tv_top_margin_property, conn_state->tv.margins.top); drm_object_attach_property(&connector->base, - i915->drm.mode_config.tv_right_margin_property, + display->drm->mode_config.tv_right_margin_property, conn_state->tv.margins.right); drm_object_attach_property(&connector->base, - i915->drm.mode_config.tv_bottom_margin_property, + display->drm->mode_config.tv_bottom_margin_property, conn_state->tv.margins.bottom); } void -intel_tv_init(struct drm_i915_private *dev_priv) +intel_tv_init(struct intel_display *display) { struct drm_connector *connector; struct intel_tv *intel_tv; @@ -1936,11 +1941,11 @@ intel_tv_init(struct drm_i915_private *dev_priv) struct intel_connector *intel_connector; u32 tv_dac_on, tv_dac_off, save_tv_dac; - if ((intel_de_read(dev_priv, TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED) + if ((intel_de_read(display, TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED) return; - if (!intel_bios_is_tv_present(dev_priv)) { - drm_dbg_kms(&dev_priv->drm, "Integrated TV is not present.\n"); + if (!intel_bios_is_tv_present(display)) { + drm_dbg_kms(display->drm, "Integrated TV is not present.\n"); return; } @@ -1948,15 +1953,15 @@ intel_tv_init(struct drm_i915_private *dev_priv) * Sanity check the TV output by checking to see if the * DAC register holds a value */ - save_tv_dac = intel_de_read(dev_priv, TV_DAC); + save_tv_dac = intel_de_read(display, TV_DAC); - intel_de_write(dev_priv, TV_DAC, save_tv_dac | TVDAC_STATE_CHG_EN); - tv_dac_on = intel_de_read(dev_priv, TV_DAC); + intel_de_write(display, TV_DAC, save_tv_dac | TVDAC_STATE_CHG_EN); + tv_dac_on = intel_de_read(display, TV_DAC); - intel_de_write(dev_priv, TV_DAC, save_tv_dac & ~TVDAC_STATE_CHG_EN); - tv_dac_off = intel_de_read(dev_priv, TV_DAC); + intel_de_write(display, TV_DAC, save_tv_dac & ~TVDAC_STATE_CHG_EN); + tv_dac_off = intel_de_read(display, TV_DAC); - intel_de_write(dev_priv, TV_DAC, save_tv_dac); + intel_de_write(display, TV_DAC, save_tv_dac); /* * If the register does not hold the state change enable @@ -1994,10 +1999,11 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT; intel_connector->base.polled = intel_connector->polled; - drm_connector_init(&dev_priv->drm, connector, &intel_tv_connector_funcs, + drm_connector_init(display->drm, connector, &intel_tv_connector_funcs, DRM_MODE_CONNECTOR_SVIDEO); - drm_encoder_init(&dev_priv->drm, &intel_encoder->base, &intel_tv_enc_funcs, + drm_encoder_init(display->drm, &intel_encoder->base, + &intel_tv_enc_funcs, DRM_MODE_ENCODER_TVDAC, "TV"); intel_encoder->compute_config = intel_tv_compute_config; diff --git a/drivers/gpu/drm/i915/display/intel_tv.h b/drivers/gpu/drm/i915/display/intel_tv.h index f08827b8bf2b..0f280f69e73c 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.h +++ b/drivers/gpu/drm/i915/display/intel_tv.h @@ -6,12 +6,12 @@ #ifndef __INTEL_TV_H__ #define __INTEL_TV_H__ -struct drm_i915_private; +struct intel_display; #ifdef I915 -void intel_tv_init(struct drm_i915_private *dev_priv); +void intel_tv_init(struct intel_display *display); #else -static inline void intel_tv_init(struct drm_i915_private *dev_priv) +static inline void intel_tv_init(struct intel_display *display) { } #endif diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 5b065e1cd4e4..0b7f2134e441 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -67,8 +67,8 @@ */ u32 i915_get_vblank_counter(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct drm_vblank_crtc *vblank = &dev_priv->drm.vblank[drm_crtc_index(crtc)]; + struct intel_display *display = to_intel_display(crtc->dev); + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); const struct drm_display_mode *mode = &vblank->hwmode; enum pipe pipe = to_intel_crtc(crtc)->pipe; u32 pixel, vbl_start, hsync_start, htotal; @@ -103,8 +103,8 @@ u32 i915_get_vblank_counter(struct drm_crtc *crtc) * we get a low value that's stable across two reads of the high * register. */ - frame = intel_de_read64_2x32(dev_priv, PIPEFRAMEPIXEL(dev_priv, pipe), - PIPEFRAME(dev_priv, pipe)); + frame = intel_de_read64_2x32(display, PIPEFRAMEPIXEL(display, pipe), + PIPEFRAME(display, pipe)); pixel = frame & PIPE_PIXEL_MASK; frame = (frame >> PIPE_FRAME_LOW_SHIFT) & 0xffffff; @@ -119,19 +119,19 @@ u32 i915_get_vblank_counter(struct drm_crtc *crtc) u32 g4x_get_vblank_counter(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct drm_vblank_crtc *vblank = &dev_priv->drm.vblank[drm_crtc_index(crtc)]; + struct intel_display *display = to_intel_display(crtc->dev); + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); enum pipe pipe = to_intel_crtc(crtc)->pipe; if (!vblank->max_vblank_count) return 0; - return intel_de_read(dev_priv, PIPE_FRMCOUNT_G4X(dev_priv, pipe)); + return intel_de_read(display, PIPE_FRMCOUNT_G4X(display, pipe)); } static u32 intel_crtc_scanlines_since_frame_timestamp(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(&crtc->base); const struct drm_display_mode *mode = &vblank->hwmode; u32 htotal = mode->crtc_htotal; @@ -150,16 +150,16 @@ static u32 intel_crtc_scanlines_since_frame_timestamp(struct intel_crtc *crtc) * pipe frame time stamp. The time stamp value * is sampled at every start of vertical blank. */ - scan_prev_time = intel_de_read_fw(dev_priv, + scan_prev_time = intel_de_read_fw(display, PIPE_FRMTMSTMP(crtc->pipe)); /* * The TIMESTAMP_CTR register has the current * time stamp value. */ - scan_curr_time = intel_de_read_fw(dev_priv, IVB_TIMESTAMP_CTR); + scan_curr_time = intel_de_read_fw(display, IVB_TIMESTAMP_CTR); - scan_post_time = intel_de_read_fw(dev_priv, + scan_post_time = intel_de_read_fw(display, PIPE_FRMTMSTMP(crtc->pipe)); } while (scan_post_time != scan_prev_time); @@ -190,8 +190,9 @@ static u32 __intel_get_crtc_scanline_from_timestamp(struct intel_crtc *crtc) return scanline; } -static int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state) +int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(crtc_state); struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); /* @@ -220,7 +221,7 @@ static int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state) * However if queried just before the start of vblank we'll get an * answer that's slightly in the future. */ - if (DISPLAY_VER(i915) == 2) + if (DISPLAY_VER(display) == 2) return -1; else if (HAS_DDI(i915) && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return 2; @@ -234,8 +235,7 @@ static int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state) */ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(crtc); struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(&crtc->base); const struct drm_display_mode *mode = &vblank->hwmode; enum pipe pipe = crtc->pipe; @@ -249,7 +249,7 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) vtotal = intel_mode_vtotal(mode); - position = intel_de_read_fw(dev_priv, PIPEDSL(dev_priv, pipe)) & PIPEDSL_LINE_MASK; + position = intel_de_read_fw(display, PIPEDSL(display, pipe)) & PIPEDSL_LINE_MASK; /* * On HSW, the DSL reg (0x70000) appears to return 0 if we @@ -263,13 +263,13 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) * problem. We may need to extend this to include other platforms, * but so far testing only shows the problem on HSW. */ - if (HAS_DDI(dev_priv) && !position) { + if (HAS_DDI(display) && !position) { int i, temp; for (i = 0; i < 100; i++) { udelay(1); - temp = intel_de_read_fw(dev_priv, - PIPEDSL(dev_priv, pipe)) & PIPEDSL_LINE_MASK; + temp = intel_de_read_fw(display, + PIPEDSL(display, pipe)) & PIPEDSL_LINE_MASK; if (temp != position) { position = temp; break; @@ -284,15 +284,6 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) return (position + vtotal + crtc->scanline_offset) % vtotal; } -int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline) -{ - const struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(&crtc->base); - const struct drm_display_mode *mode = &vblank->hwmode; - int vtotal = intel_mode_vtotal(mode); - - return (scanline + vtotal - crtc->scanline_offset) % vtotal; -} - /* * The uncore version of the spin lock functions is used to decide * whether we need to lock the uncore lock or not. This is only @@ -303,41 +294,49 @@ int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline) * all register accesses to the same cacheline to be serialized, * otherwise they may hang. */ -static void intel_vblank_section_enter(struct drm_i915_private *i915) +#ifdef I915 +static void intel_vblank_section_enter(struct intel_display *display) __acquires(i915->uncore.lock) { -#ifdef I915 + struct drm_i915_private *i915 = to_i915(display->drm); spin_lock(&i915->uncore.lock); -#endif } -static void intel_vblank_section_exit(struct drm_i915_private *i915) +static void intel_vblank_section_exit(struct intel_display *display) __releases(i915->uncore.lock) { -#ifdef I915 + struct drm_i915_private *i915 = to_i915(display->drm); spin_unlock(&i915->uncore.lock); -#endif +} +#else +static void intel_vblank_section_enter(struct intel_display *display) +{ } +static void intel_vblank_section_exit(struct intel_display *display) +{ +} +#endif + static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, bool in_vblank_irq, int *vpos, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode) { - struct drm_device *dev = _crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = to_intel_display(_crtc->dev); + struct drm_i915_private *dev_priv = to_i915(display->drm); struct intel_crtc *crtc = to_intel_crtc(_crtc); enum pipe pipe = crtc->pipe; int position; int vbl_start, vbl_end, hsync_start, htotal, vtotal; unsigned long irqflags; - bool use_scanline_counter = DISPLAY_VER(dev_priv) >= 5 || - IS_G4X(dev_priv) || DISPLAY_VER(dev_priv) == 2 || + bool use_scanline_counter = DISPLAY_VER(display) >= 5 || + IS_G4X(dev_priv) || DISPLAY_VER(display) == 2 || crtc->mode_flags & I915_MODE_FLAG_USE_SCANLINE_COUNTER; - if (drm_WARN_ON(&dev_priv->drm, !mode->crtc_clock)) { - drm_dbg(&dev_priv->drm, + if (drm_WARN_ON(display->drm, !mode->crtc_clock)) { + drm_dbg(display->drm, "trying to get scanoutpos for disabled pipe %c\n", pipe_name(pipe)); return false; @@ -355,7 +354,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, * preemption disabled, so the following code must not block. */ local_irq_save(irqflags); - intel_vblank_section_enter(dev_priv); + intel_vblank_section_enter(display); /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -387,7 +386,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, * We can split this into vertical and horizontal * scanout position. */ - position = (intel_de_read_fw(dev_priv, PIPEFRAMEPIXEL(dev_priv, pipe)) & PIPE_PIXEL_MASK) >> PIPE_PIXEL_SHIFT; + position = (intel_de_read_fw(display, PIPEFRAMEPIXEL(display, pipe)) & PIPE_PIXEL_MASK) >> PIPE_PIXEL_SHIFT; /* convert to pixel counts */ vbl_start *= htotal; @@ -423,7 +422,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ - intel_vblank_section_exit(dev_priv); + intel_vblank_section_exit(display); local_irq_restore(irqflags); /* @@ -458,42 +457,42 @@ bool intel_crtc_get_vblank_timestamp(struct drm_crtc *crtc, int *max_error, int intel_get_crtc_scanline(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); unsigned long irqflags; int position; local_irq_save(irqflags); - intel_vblank_section_enter(dev_priv); + intel_vblank_section_enter(display); position = __intel_get_crtc_scanline(crtc); - intel_vblank_section_exit(dev_priv); + intel_vblank_section_exit(display); local_irq_restore(irqflags); return position; } -static bool pipe_scanline_is_moving(struct drm_i915_private *dev_priv, +static bool pipe_scanline_is_moving(struct intel_display *display, enum pipe pipe) { - i915_reg_t reg = PIPEDSL(dev_priv, pipe); + i915_reg_t reg = PIPEDSL(display, pipe); u32 line1, line2; - line1 = intel_de_read(dev_priv, reg) & PIPEDSL_LINE_MASK; + line1 = intel_de_read(display, reg) & PIPEDSL_LINE_MASK; msleep(5); - line2 = intel_de_read(dev_priv, reg) & PIPEDSL_LINE_MASK; + line2 = intel_de_read(display, reg) & PIPEDSL_LINE_MASK; return line1 != line2; } static void wait_for_pipe_scanline_moving(struct intel_crtc *crtc, bool state) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); enum pipe pipe = crtc->pipe; /* Wait for the display line to settle/start moving */ - if (wait_for(pipe_scanline_is_moving(dev_priv, pipe) == state, 100)) - drm_err(&dev_priv->drm, + if (wait_for(pipe_scanline_is_moving(display, pipe) == state, 100)) + drm_err(display->drm, "pipe %c scanline %s wait timed out\n", pipe_name(pipe), str_on_off(state)); } @@ -511,8 +510,8 @@ void intel_wait_for_pipe_scanline_moving(struct intel_crtc *crtc) void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, bool vrr_enable) { + struct intel_display *display = to_intel_display(crtc_state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); u8 mode_flags = crtc_state->mode_flags; struct drm_display_mode adjusted_mode; int vmax_vblank_start = 0; @@ -521,7 +520,8 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, drm_mode_init(&adjusted_mode, &crtc_state->hw.adjusted_mode); if (vrr_enable) { - drm_WARN_ON(&i915->drm, (mode_flags & I915_MODE_FLAG_VRR) == 0); + drm_WARN_ON(display->drm, + (mode_flags & I915_MODE_FLAG_VRR) == 0); adjusted_mode.crtc_vtotal = crtc_state->vrr.vmax; adjusted_mode.crtc_vblank_end = crtc_state->vrr.vmax; @@ -543,8 +543,8 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, * __intel_get_crtc_scanline()) with vblank_time_lock? * Need to audit everything to make sure it's safe. */ - spin_lock_irqsave(&i915->drm.vblank_time_lock, irqflags); - intel_vblank_section_enter(i915); + spin_lock_irqsave(&display->drm->vblank_time_lock, irqflags); + intel_vblank_section_enter(display); drm_calc_timestamping_constants(&crtc->base, &adjusted_mode); @@ -553,8 +553,8 @@ void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, crtc->mode_flags = mode_flags; crtc->scanline_offset = intel_crtc_scanline_offset(crtc_state); - intel_vblank_section_exit(i915); - spin_unlock_irqrestore(&i915->drm.vblank_time_lock, irqflags); + intel_vblank_section_exit(display); + spin_unlock_irqrestore(&display->drm->vblank_time_lock, irqflags); } int intel_mode_vdisplay(const struct drm_display_mode *mode) @@ -652,14 +652,15 @@ void intel_vblank_evade_init(const struct intel_crtc_state *old_crtc_state, */ if (intel_color_uses_dsb(new_crtc_state) || new_crtc_state->update_m_n || new_crtc_state->update_lrr) - evade->min -= adjusted_mode->crtc_vblank_start - adjusted_mode->crtc_vdisplay; + evade->min -= intel_mode_vblank_start(adjusted_mode) - + intel_mode_vdisplay(adjusted_mode); } /* must be called with vblank interrupt already enabled! */ int intel_vblank_evade(struct intel_vblank_evade_ctx *evade) { struct intel_crtc *crtc = evade->crtc; - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc); long timeout = msecs_to_jiffies_timeout(1); wait_queue_head_t *wq = drm_crtc_vblank_waitqueue(&crtc->base); DEFINE_WAIT(wait); @@ -681,7 +682,7 @@ int intel_vblank_evade(struct intel_vblank_evade_ctx *evade) break; if (!timeout) { - drm_err(&i915->drm, + drm_err(display->drm, "Potential atomic update failure on pipe %c\n", pipe_name(crtc->pipe)); break; diff --git a/drivers/gpu/drm/i915/display/intel_vblank.h b/drivers/gpu/drm/i915/display/intel_vblank.h index 7e526f6861e4..6d7336256982 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.h +++ b/drivers/gpu/drm/i915/display/intel_vblank.h @@ -40,6 +40,6 @@ void intel_wait_for_pipe_scanline_stopped(struct intel_crtc *crtc); void intel_wait_for_pipe_scanline_moving(struct intel_crtc *crtc); void intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state, bool vrr_enable); -int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline); +int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state); #endif /* __INTEL_VBLANK_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 1af8407e2081..e613288937e4 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -493,7 +493,7 @@ struct child_device_config { u16 addin_offset; u8 dvo_port; /* See DEVICE_PORT_* and DVO_PORT_* above */ u8 i2c_pin; - u8 slave_addr; + u8 target_addr; u8 ddc_pin; u16 edid_ptr; u8 dvo_cfg; /* See DEVICE_CFG_* above */ @@ -502,7 +502,7 @@ struct child_device_config { struct { u8 dvo2_port; u8 i2c2_pin; - u8 slave2_addr; + u8 target2_addr; u8 ddc2_pin; } __packed; struct { diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index b9687b7692b8..2e849b015e74 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -8,6 +8,7 @@ #include <linux/limits.h> #include <drm/display/drm_dsc_helper.h> +#include <drm/drm_fixed.h> #include "i915_drv.h" #include "intel_crtc.h" @@ -76,7 +77,7 @@ intel_vdsc_set_min_max_qp(struct drm_dsc_config *vdsc_cfg, int buf, static void calculate_rc_params(struct drm_dsc_config *vdsc_cfg) { - int bpp = to_bpp_int(vdsc_cfg->bits_per_pixel); + int bpp = fxp_q4_to_int(vdsc_cfg->bits_per_pixel); int bpc = vdsc_cfg->bits_per_component; int qp_bpc_modifier = (bpc - 8) * 2; int uncompressed_bpg_rate; @@ -184,7 +185,7 @@ calculate_rc_params(struct drm_dsc_config *vdsc_cfg) } } else { /* fractional bpp part * 10000 (for precision up to 4 decimal places) */ - int fractional_bits = to_bpp_frac(vdsc_cfg->bits_per_pixel); + int fractional_bits = fxp_q4_to_frac(vdsc_cfg->bits_per_pixel); static const s8 ofs_und6[] = { 0, -2, -2, -4, -6, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12 @@ -263,7 +264,7 @@ int intel_dsc_compute_params(struct intel_crtc_state *pipe_config) struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct drm_dsc_config *vdsc_cfg = &pipe_config->dsc.config; - u16 compressed_bpp = to_bpp_int(pipe_config->dsc.compressed_bpp_x16); + u16 compressed_bpp = fxp_q4_to_int(pipe_config->dsc.compressed_bpp_x16); int err; int ret; @@ -456,36 +457,30 @@ static void intel_dsc_pps_configure(const struct intel_crtc_state *crtc_state) pps_val |= DSC_PPS0_422_ENABLE; if (vdsc_cfg->vbr_enable) pps_val |= DSC_PPS0_VBR_ENABLE; - drm_dbg_kms(&dev_priv->drm, "PPS0 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 0, pps_val); /* PPS 1 */ pps_val = DSC_PPS1_BPP(vdsc_cfg->bits_per_pixel); - drm_dbg_kms(&dev_priv->drm, "PPS1 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 1, pps_val); /* PPS 2 */ pps_val = DSC_PPS2_PIC_HEIGHT(vdsc_cfg->pic_height) | DSC_PPS2_PIC_WIDTH(vdsc_cfg->pic_width / num_vdsc_instances); - drm_dbg_kms(&dev_priv->drm, "PPS2 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 2, pps_val); /* PPS 3 */ pps_val = DSC_PPS3_SLICE_HEIGHT(vdsc_cfg->slice_height) | DSC_PPS3_SLICE_WIDTH(vdsc_cfg->slice_width); - drm_dbg_kms(&dev_priv->drm, "PPS3 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 3, pps_val); /* PPS 4 */ pps_val = DSC_PPS4_INITIAL_XMIT_DELAY(vdsc_cfg->initial_xmit_delay) | DSC_PPS4_INITIAL_DEC_DELAY(vdsc_cfg->initial_dec_delay); - drm_dbg_kms(&dev_priv->drm, "PPS4 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 4, pps_val); /* PPS 5 */ pps_val = DSC_PPS5_SCALE_INC_INT(vdsc_cfg->scale_increment_interval) | DSC_PPS5_SCALE_DEC_INT(vdsc_cfg->scale_decrement_interval); - drm_dbg_kms(&dev_priv->drm, "PPS5 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 5, pps_val); /* PPS 6 */ @@ -493,25 +488,21 @@ static void intel_dsc_pps_configure(const struct intel_crtc_state *crtc_state) DSC_PPS6_FIRST_LINE_BPG_OFFSET(vdsc_cfg->first_line_bpg_offset) | DSC_PPS6_FLATNESS_MIN_QP(vdsc_cfg->flatness_min_qp) | DSC_PPS6_FLATNESS_MAX_QP(vdsc_cfg->flatness_max_qp); - drm_dbg_kms(&dev_priv->drm, "PPS6 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 6, pps_val); /* PPS 7 */ pps_val = DSC_PPS7_SLICE_BPG_OFFSET(vdsc_cfg->slice_bpg_offset) | DSC_PPS7_NFL_BPG_OFFSET(vdsc_cfg->nfl_bpg_offset); - drm_dbg_kms(&dev_priv->drm, "PPS7 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 7, pps_val); /* PPS 8 */ pps_val = DSC_PPS8_FINAL_OFFSET(vdsc_cfg->final_offset) | DSC_PPS8_INITIAL_OFFSET(vdsc_cfg->initial_offset); - drm_dbg_kms(&dev_priv->drm, "PPS8 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 8, pps_val); /* PPS 9 */ pps_val = DSC_PPS9_RC_MODEL_SIZE(vdsc_cfg->rc_model_size) | DSC_PPS9_RC_EDGE_FACTOR(DSC_RC_EDGE_FACTOR_CONST); - drm_dbg_kms(&dev_priv->drm, "PPS9 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 9, pps_val); /* PPS 10 */ @@ -519,7 +510,6 @@ static void intel_dsc_pps_configure(const struct intel_crtc_state *crtc_state) DSC_PPS10_RC_QUANT_INC_LIMIT1(vdsc_cfg->rc_quant_incr_limit1) | DSC_PPS10_RC_TARGET_OFF_HIGH(DSC_RC_TGT_OFFSET_HI_CONST) | DSC_PPS10_RC_TARGET_OFF_LOW(DSC_RC_TGT_OFFSET_LO_CONST); - drm_dbg_kms(&dev_priv->drm, "PPS10 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 10, pps_val); /* PPS 16 */ @@ -528,31 +518,25 @@ static void intel_dsc_pps_configure(const struct intel_crtc_state *crtc_state) vdsc_cfg->slice_width) | DSC_PPS16_SLICE_ROW_PER_FRAME(vdsc_cfg->pic_height / vdsc_cfg->slice_height); - drm_dbg_kms(&dev_priv->drm, "PPS16 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 16, pps_val); if (DISPLAY_VER(dev_priv) >= 14) { /* PPS 17 */ pps_val = DSC_PPS17_SL_BPG_OFFSET(vdsc_cfg->second_line_bpg_offset); - drm_dbg_kms(&dev_priv->drm, "PPS17 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 17, pps_val); /* PPS 18 */ pps_val = DSC_PPS18_NSL_BPG_OFFSET(vdsc_cfg->nsl_bpg_offset) | DSC_PPS18_SL_OFFSET_ADJ(vdsc_cfg->second_line_offset_adj); - drm_dbg_kms(&dev_priv->drm, "PPS18 = 0x%08x\n", pps_val); intel_dsc_pps_write(crtc_state, 18, pps_val); } /* Populate the RC_BUF_THRESH registers */ memset(rc_buf_thresh_dword, 0, sizeof(rc_buf_thresh_dword)); - for (i = 0; i < DSC_NUM_BUF_RANGES - 1; i++) { + for (i = 0; i < DSC_NUM_BUF_RANGES - 1; i++) rc_buf_thresh_dword[i / 4] |= (u32)(vdsc_cfg->rc_buf_thresh[i] << BITS_PER_BYTE * (i % 4)); - drm_dbg_kms(&dev_priv->drm, "RC_BUF_THRESH_%d = 0x%08x\n", i, - rc_buf_thresh_dword[i / 4]); - } if (!is_pipe_dsc(crtc, cpu_transcoder)) { intel_de_write(dev_priv, DSCA_RC_BUF_THRESH_0, rc_buf_thresh_dword[0]); @@ -599,7 +583,7 @@ static void intel_dsc_pps_configure(const struct intel_crtc_state *crtc_state) /* Populate the RC_RANGE_PARAMETERS registers */ memset(rc_range_params_dword, 0, sizeof(rc_range_params_dword)); - for (i = 0; i < DSC_NUM_BUF_RANGES; i++) { + for (i = 0; i < DSC_NUM_BUF_RANGES; i++) rc_range_params_dword[i / 2] |= (u32)(((vdsc_cfg->rc_range_params[i].range_bpg_offset << RC_BPG_OFFSET_SHIFT) | @@ -607,9 +591,6 @@ static void intel_dsc_pps_configure(const struct intel_crtc_state *crtc_state) RC_MAX_QP_SHIFT) | (vdsc_cfg->rc_range_params[i].range_min_qp << RC_MIN_QP_SHIFT)) << 16 * (i % 2)); - drm_dbg_kms(&dev_priv->drm, "RC_RANGE_PARAM_%d = 0x%08x\n", i, - rc_range_params_dword[i / 2]); - } if (!is_pipe_dsc(crtc, cpu_transcoder)) { intel_de_write(dev_priv, DSCA_RC_RANGE_PARAMETERS_0, rc_range_params_dword[0]); @@ -989,3 +970,23 @@ void intel_dsc_get_config(struct intel_crtc_state *crtc_state) out: intel_display_power_put(dev_priv, power_domain, wakeref); } + +static void intel_vdsc_dump_state(struct drm_printer *p, int indent, + const struct intel_crtc_state *crtc_state) +{ + drm_printf_indent(p, indent, + "dsc-dss: compressed-bpp:" FXP_Q4_FMT ", slice-count: %d, split: %s\n", + FXP_Q4_ARGS(crtc_state->dsc.compressed_bpp_x16), + crtc_state->dsc.slice_count, + str_yes_no(crtc_state->dsc.dsc_split)); +} + +void intel_vdsc_state_dump(struct drm_printer *p, int indent, + const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->dsc.compression_enable) + return; + + intel_vdsc_dump_state(p, indent, crtc_state); + drm_dsc_dump_config(p, indent, &crtc_state->dsc.config); +} diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.h b/drivers/gpu/drm/i915/display/intel_vdsc.h index 2cc41ff08909..290b2e9b3482 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc.h @@ -8,6 +8,8 @@ #include <linux/types.h> +struct drm_printer; + enum transcoder; struct intel_crtc; struct intel_crtc_state; @@ -27,5 +29,7 @@ void intel_dsc_dsi_pps_write(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); void intel_dsc_dp_pps_write(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); +void intel_vdsc_state_dump(struct drm_printer *p, int indent, + const struct intel_crtc_state *crtc_state); #endif /* __INTEL_VDSC_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c index 5a0da64c7db3..9a51f5bac307 100644 --- a/drivers/gpu/drm/i915/display/intel_vrr.c +++ b/drivers/gpu/drm/i915/display/intel_vrr.c @@ -17,8 +17,8 @@ bool intel_vrr_is_capable(struct intel_connector *connector) { + struct intel_display *display = to_intel_display(connector); const struct drm_display_info *info = &connector->base.display_info; - struct drm_i915_private *i915 = to_i915(connector->base.dev); struct intel_dp *intel_dp; /* @@ -43,7 +43,7 @@ bool intel_vrr_is_capable(struct intel_connector *connector) return false; } - return HAS_VRR(i915) && + return HAS_VRR(display) && info->monitor_range.max_vfreq - info->monitor_range.min_vfreq > 10; } @@ -89,10 +89,9 @@ intel_vrr_check_modeset(struct intel_atomic_state *state) */ static int intel_vrr_vblank_exit_length(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc_state); - if (DISPLAY_VER(i915) >= 13) + if (DISPLAY_VER(display) >= 13) return crtc_state->vrr.guardband; else /* The hw imposes the extra scanline before frame start */ @@ -113,11 +112,11 @@ int intel_vrr_vmax_vblank_start(const struct intel_crtc_state *crtc_state) static bool is_cmrr_frac_required(struct intel_crtc_state *crtc_state) { + struct intel_display *display = to_intel_display(crtc_state); int calculated_refresh_k, actual_refresh_k, pixel_clock_per_line; struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - if (!HAS_CMRR(i915)) + if (!HAS_CMRR(display)) return false; actual_refresh_k = @@ -161,8 +160,7 @@ void intel_vrr_compute_config(struct intel_crtc_state *crtc_state, struct drm_connector_state *conn_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *i915 = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc_state); struct intel_connector *connector = to_intel_connector(conn_state->connector); struct intel_dp *intel_dp = intel_attached_dp(connector); @@ -186,7 +184,7 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, if (!crtc_state->vrr.in_range) return; - if (HAS_LRR(i915)) + if (HAS_LRR(display)) crtc_state->update_lrr = true; vmin = DIV_ROUND_UP(adjusted_mode->crtc_clock * 1000, @@ -233,8 +231,7 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, crtc_state->mode_flags |= I915_MODE_FLAG_VRR; } - if (intel_dp_as_sdp_supported(intel_dp) && - crtc_state->vrr.enable) { + if (intel_dp->as_sdp_supported && crtc_state->vrr.enable) { crtc_state->vrr.vsync_start = (crtc_state->hw.adjusted_mode.crtc_vtotal - crtc_state->hw.adjusted_mode.vsync_start); @@ -247,7 +244,7 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, * For XE_LPD+, we use guardband and pipeline override * is deprecated. */ - if (DISPLAY_VER(i915) >= 13) { + if (DISPLAY_VER(display) >= 13) { crtc_state->vrr.guardband = crtc_state->vrr.vmin + 1 - adjusted_mode->crtc_vblank_start; } else { @@ -259,9 +256,9 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state, static u32 trans_vrr_ctl(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); - if (DISPLAY_VER(i915) >= 13) + if (DISPLAY_VER(display) >= 13) return VRR_CTL_IGN_MAX_SHIFT | VRR_CTL_FLIP_LINE_EN | XELPD_VRR_CTL_VRR_GUARDBAND(crtc_state->vrr.guardband); else @@ -272,7 +269,7 @@ static u32 trans_vrr_ctl(const struct intel_crtc_state *crtc_state) void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; /* @@ -280,133 +277,130 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state) * TGL: generate VRR "safe window" for DSB vblank waits * ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR */ - if (IS_DISPLAY_VER(dev_priv, 12, 13)) - intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), + if (IS_DISPLAY_VER(display, 12, 13)) + intel_de_rmw(display, CHICKEN_TRANS(cpu_transcoder), 0, PIPE_VBLANK_WITH_DELAY); if (!crtc_state->vrr.flipline) { - intel_de_write(dev_priv, - TRANS_VRR_CTL(dev_priv, cpu_transcoder), 0); + intel_de_write(display, + TRANS_VRR_CTL(display, cpu_transcoder), 0); return; } if (crtc_state->cmrr.enable) { - intel_de_write(dev_priv, TRANS_CMRR_M_HI(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_CMRR_M_HI(display, cpu_transcoder), upper_32_bits(crtc_state->cmrr.cmrr_m)); - intel_de_write(dev_priv, TRANS_CMRR_M_LO(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_CMRR_M_LO(display, cpu_transcoder), lower_32_bits(crtc_state->cmrr.cmrr_m)); - intel_de_write(dev_priv, TRANS_CMRR_N_HI(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_CMRR_N_HI(display, cpu_transcoder), upper_32_bits(crtc_state->cmrr.cmrr_n)); - intel_de_write(dev_priv, TRANS_CMRR_N_LO(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_CMRR_N_LO(display, cpu_transcoder), lower_32_bits(crtc_state->cmrr.cmrr_n)); } - intel_de_write(dev_priv, TRANS_VRR_VMIN(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_VMIN(display, cpu_transcoder), crtc_state->vrr.vmin - 1); - intel_de_write(dev_priv, TRANS_VRR_VMAX(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_VMAX(display, cpu_transcoder), crtc_state->vrr.vmax - 1); - intel_de_write(dev_priv, TRANS_VRR_CTL(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), trans_vrr_ctl(crtc_state)); - intel_de_write(dev_priv, TRANS_VRR_FLIPLINE(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_FLIPLINE(display, cpu_transcoder), crtc_state->vrr.flipline - 1); } void intel_vrr_send_push(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (!crtc_state->vrr.enable) return; - intel_de_write(dev_priv, TRANS_PUSH(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), TRANS_PUSH_EN | TRANS_PUSH_SEND); } bool intel_vrr_is_push_sent(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (!crtc_state->vrr.enable) return false; - return intel_de_read(dev_priv, TRANS_PUSH(dev_priv, cpu_transcoder)) & TRANS_PUSH_SEND; + return intel_de_read(display, TRANS_PUSH(display, cpu_transcoder)) & TRANS_PUSH_SEND; } void intel_vrr_enable(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (!crtc_state->vrr.enable) return; - intel_de_write(dev_priv, TRANS_PUSH(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), TRANS_PUSH_EN); - if (HAS_AS_SDP(dev_priv)) - intel_de_write(dev_priv, - TRANS_VRR_VSYNC(dev_priv, cpu_transcoder), + if (HAS_AS_SDP(display)) + intel_de_write(display, + TRANS_VRR_VSYNC(display, cpu_transcoder), VRR_VSYNC_END(crtc_state->vrr.vsync_end) | VRR_VSYNC_START(crtc_state->vrr.vsync_start)); if (crtc_state->cmrr.enable) { - intel_de_write(dev_priv, TRANS_VRR_CTL(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), VRR_CTL_VRR_ENABLE | VRR_CTL_CMRR_ENABLE | trans_vrr_ctl(crtc_state)); } else { - intel_de_write(dev_priv, TRANS_VRR_CTL(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), VRR_CTL_VRR_ENABLE | trans_vrr_ctl(crtc_state)); } } void intel_vrr_disable(const struct intel_crtc_state *old_crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_display *display = to_intel_display(old_crtc_state); enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; if (!old_crtc_state->vrr.enable) return; - intel_de_write(dev_priv, TRANS_VRR_CTL(dev_priv, cpu_transcoder), + intel_de_write(display, TRANS_VRR_CTL(display, cpu_transcoder), trans_vrr_ctl(old_crtc_state)); - intel_de_wait_for_clear(dev_priv, - TRANS_VRR_STATUS(dev_priv, cpu_transcoder), + intel_de_wait_for_clear(display, + TRANS_VRR_STATUS(display, cpu_transcoder), VRR_STATUS_VRR_EN_LIVE, 1000); - intel_de_write(dev_priv, TRANS_PUSH(dev_priv, cpu_transcoder), 0); + intel_de_write(display, TRANS_PUSH(display, cpu_transcoder), 0); - if (HAS_AS_SDP(dev_priv)) - intel_de_write(dev_priv, - TRANS_VRR_VSYNC(dev_priv, cpu_transcoder), 0); + if (HAS_AS_SDP(display)) + intel_de_write(display, + TRANS_VRR_VSYNC(display, cpu_transcoder), 0); } void intel_vrr_get_config(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + struct intel_display *display = to_intel_display(crtc_state); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; u32 trans_vrr_ctl, trans_vrr_vsync; - trans_vrr_ctl = intel_de_read(dev_priv, - TRANS_VRR_CTL(dev_priv, cpu_transcoder)); + trans_vrr_ctl = intel_de_read(display, + TRANS_VRR_CTL(display, cpu_transcoder)); crtc_state->vrr.enable = trans_vrr_ctl & VRR_CTL_VRR_ENABLE; - if (HAS_CMRR(dev_priv)) + if (HAS_CMRR(display)) crtc_state->cmrr.enable = (trans_vrr_ctl & VRR_CTL_CMRR_ENABLE); if (crtc_state->cmrr.enable) { crtc_state->cmrr.cmrr_n = - intel_de_read64_2x32(dev_priv, TRANS_CMRR_N_LO(dev_priv, cpu_transcoder), - TRANS_CMRR_N_HI(dev_priv, cpu_transcoder)); + intel_de_read64_2x32(display, TRANS_CMRR_N_LO(display, cpu_transcoder), + TRANS_CMRR_N_HI(display, cpu_transcoder)); crtc_state->cmrr.cmrr_m = - intel_de_read64_2x32(dev_priv, TRANS_CMRR_M_LO(dev_priv, cpu_transcoder), - TRANS_CMRR_M_HI(dev_priv, cpu_transcoder)); + intel_de_read64_2x32(display, TRANS_CMRR_M_LO(display, cpu_transcoder), + TRANS_CMRR_M_HI(display, cpu_transcoder)); } - if (DISPLAY_VER(dev_priv) >= 13) + if (DISPLAY_VER(display) >= 13) crtc_state->vrr.guardband = REG_FIELD_GET(XELPD_VRR_CTL_VRR_GUARDBAND_MASK, trans_vrr_ctl); else @@ -415,21 +409,21 @@ void intel_vrr_get_config(struct intel_crtc_state *crtc_state) REG_FIELD_GET(VRR_CTL_PIPELINE_FULL_MASK, trans_vrr_ctl); if (trans_vrr_ctl & VRR_CTL_FLIP_LINE_EN) { - crtc_state->vrr.flipline = intel_de_read(dev_priv, - TRANS_VRR_FLIPLINE(dev_priv, cpu_transcoder)) + 1; - crtc_state->vrr.vmax = intel_de_read(dev_priv, - TRANS_VRR_VMAX(dev_priv, cpu_transcoder)) + 1; - crtc_state->vrr.vmin = intel_de_read(dev_priv, - TRANS_VRR_VMIN(dev_priv, cpu_transcoder)) + 1; + crtc_state->vrr.flipline = intel_de_read(display, + TRANS_VRR_FLIPLINE(display, cpu_transcoder)) + 1; + crtc_state->vrr.vmax = intel_de_read(display, + TRANS_VRR_VMAX(display, cpu_transcoder)) + 1; + crtc_state->vrr.vmin = intel_de_read(display, + TRANS_VRR_VMIN(display, cpu_transcoder)) + 1; } if (crtc_state->vrr.enable) { crtc_state->mode_flags |= I915_MODE_FLAG_VRR; - if (HAS_AS_SDP(dev_priv)) { + if (HAS_AS_SDP(display)) { trans_vrr_vsync = - intel_de_read(dev_priv, - TRANS_VRR_VSYNC(dev_priv, cpu_transcoder)); + intel_de_read(display, + TRANS_VRR_VSYNC(display, cpu_transcoder)); crtc_state->vrr.vsync_start = REG_FIELD_GET(VRR_VSYNC_START_MASK, trans_vrr_vsync); crtc_state->vrr.vsync_end = diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index ba5a628b4757..9452cad41d07 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -14,6 +14,7 @@ #include "intel_de.h" #include "intel_display_irq.h" #include "intel_display_types.h" +#include "intel_dpt.h" #include "intel_fb.h" #include "intel_fbc.h" #include "intel_frontbuffer.h" @@ -537,6 +538,8 @@ static u32 tgl_plane_min_alignment(struct intel_plane *plane, case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS: case I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC: case I915_FORMAT_MOD_4_TILED_DG2_MC_CCS: + case I915_FORMAT_MOD_4_TILED_BMG_CCS: + case I915_FORMAT_MOD_4_TILED_LNL_CCS: /* * Align to at least 4x1 main surface * tiles (16K) to match 64B of AUX. @@ -948,6 +951,9 @@ static u32 skl_plane_ctl_tiling(u64 fb_modifier) return PLANE_CTL_TILED_4 | PLANE_CTL_RENDER_DECOMPRESSION_ENABLE; case I915_FORMAT_MOD_4_TILED_MTL_MC_CCS: return PLANE_CTL_TILED_4 | PLANE_CTL_MEDIA_DECOMPRESSION_ENABLE; + case I915_FORMAT_MOD_4_TILED_BMG_CCS: + case I915_FORMAT_MOD_4_TILED_LNL_CCS: + return PLANE_CTL_TILED_4 | PLANE_CTL_RENDER_DECOMPRESSION_ENABLE; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC: return PLANE_CTL_TILED_Y | PLANE_CTL_RENDER_DECOMPRESSION_ENABLE; @@ -1085,11 +1091,6 @@ static u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, if (DISPLAY_VER(dev_priv) == 13) plane_ctl |= adlp_plane_ctl_arb_slots(plane_state); - if (GRAPHICS_VER(dev_priv) >= 20 && - fb->modifier == I915_FORMAT_MOD_4_TILED) { - plane_ctl |= PLANE_CTL_RENDER_DECOMPRESSION_ENABLE; - } - return plane_ctl; } @@ -1162,7 +1163,7 @@ static u32 skl_surf_address(const struct intel_plane_state *plane_state, * within the DPT is always 0. */ drm_WARN_ON(&i915->drm, plane_state->dpt_vma && - plane_state->dpt_vma->node.start); + intel_dpt_offset(plane_state->dpt_vma)); drm_WARN_ON(&i915->drm, offset & 0x1fffff); return offset >> 9; } else { diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c index a2726364b34d..045c7cac166b 100644 --- a/drivers/gpu/drm/i915/display/skl_watermark.c +++ b/drivers/gpu/drm/i915/display/skl_watermark.c @@ -2830,17 +2830,17 @@ static int skl_wm_add_affected_planes(struct intel_atomic_state *state, } /* - * If Fixed Refresh Rate: + * If Fixed Refresh Rate or For VRR case Vmin = Vmax = Flipline: * Program DEEP PKG_C_LATENCY Pkg C with highest valid latency from * watermark level1 and up and above. If watermark level 1 is * invalid program it with all 1's. * Program PKG_C_LATENCY Added Wake Time = DSB execution time - * If Variable Refresh Rate: + * If Variable Refresh Rate where Vmin != Vmax != Flipline: * Program DEEP PKG_C_LATENCY Pkg C with all 1's. * Program PKG_C_LATENCY Added Wake Time = 0 */ static void -skl_program_dpkgc_latency(struct drm_i915_private *i915, bool vrr_enabled) +skl_program_dpkgc_latency(struct drm_i915_private *i915, bool enable_dpkgc) { u32 max_latency = 0; u32 clear = 0, val = 0; @@ -2849,15 +2849,15 @@ skl_program_dpkgc_latency(struct drm_i915_private *i915, bool vrr_enabled) if (DISPLAY_VER(i915) < 20) return; - if (vrr_enabled) { - max_latency = LNL_PKG_C_LATENCY_MASK; - added_wake_time = 0; - } else { + if (enable_dpkgc) { max_latency = skl_watermark_max_latency(i915, 1); if (max_latency == 0) max_latency = LNL_PKG_C_LATENCY_MASK; added_wake_time = DSB_EXE_TIME + i915->display.sagv.block_time_us; + } else { + max_latency = LNL_PKG_C_LATENCY_MASK; + added_wake_time = 0; } clear |= LNL_ADDED_WAKE_TIME_MASK | LNL_PKG_C_LATENCY_MASK; @@ -2873,7 +2873,7 @@ skl_compute_wm(struct intel_atomic_state *state) struct intel_crtc *crtc; struct intel_crtc_state __maybe_unused *new_crtc_state; int ret, i; - bool vrr_enabled = false; + bool enable_dpkgc = false; for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { ret = skl_build_pipe_wm(state, crtc); @@ -2899,11 +2899,13 @@ skl_compute_wm(struct intel_atomic_state *state) if (ret) return ret; - if (new_crtc_state->vrr.enable) - vrr_enabled = true; + if ((new_crtc_state->vrr.vmin == new_crtc_state->vrr.vmax && + new_crtc_state->vrr.vmin == new_crtc_state->vrr.flipline) || + !new_crtc_state->vrr.enable) + enable_dpkgc = true; } - skl_program_dpkgc_latency(to_i915(state->base.dev), vrr_enabled); + skl_program_dpkgc_latency(to_i915(state->base.dev), enable_dpkgc); skl_print_wm_changes(state); diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index eae5b5e09aa8..d21f3fb39706 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1870,7 +1870,6 @@ static const struct dmi_system_id vlv_dsi_dmi_quirk_table[] = { /* Lenovo Yoga Tab 3 Pro YT3-X90F */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"), }, .driver_data = (void *)vlv_dsi_lenovo_yoga_tab3_backlight_fixup, @@ -1880,6 +1879,7 @@ static const struct dmi_system_id vlv_dsi_dmi_quirk_table[] = { void vlv_dsi_init(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct intel_dsi *intel_dsi; struct intel_encoder *encoder; struct intel_connector *connector; @@ -1891,7 +1891,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) drm_dbg_kms(&dev_priv->drm, "\n"); /* There is no detection method for MIPI so rely on VBT */ - if (!intel_bios_is_dsi_present(dev_priv, &port)) + if (!intel_bios_is_dsi_present(display, &port)) return; if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) @@ -1946,7 +1946,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv) intel_dsi->panel_power_off_time = ktime_get_boottime(); - intel_bios_init_panel_late(dev_priv, &connector->panel, NULL, NULL); + intel_bios_init_panel_late(display, &connector->panel, NULL, NULL); if (connector->panel.vbt.dsi.config->dual_link) intel_dsi->ports = BIT(PORT_A) | BIT(PORT_C); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index d54162ce0f99..01b7587dd1f8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -12,8 +12,6 @@ #include <drm/drm_auth.h> #include <drm/drm_syncobj.h> -#include "display/intel_frontbuffer.h" - #include "gem/i915_gem_ioctls.h" #include "gt/intel_context.h" #include "gt/intel_gpu_commands.h" @@ -1533,7 +1531,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) u64_to_user_ptr(entry->relocs_ptr); unsigned long remain = entry->relocation_count; - if (unlikely(remain > N_RELOC(ULONG_MAX))) + if (unlikely(remain > N_RELOC(INT_MAX))) return -EINVAL; /* @@ -1641,7 +1639,7 @@ static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) if (size == 0) return 0; - if (size > N_RELOC(ULONG_MAX)) + if (size > N_RELOC(INT_MAX)) return -EINVAL; addr = u64_to_user_ptr(entry->relocs_ptr); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index a2195e28b625..21274aa9bddd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -252,6 +252,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf) struct vm_area_struct *area = vmf->vma; struct i915_mmap_offset *mmo = area->vm_private_data; struct drm_i915_gem_object *obj = mmo->obj; + unsigned long obj_offset; resource_size_t iomap; int err; @@ -273,10 +274,11 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf) iomap -= obj->mm.region->region.start; } + obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node); /* PTEs are revoked in obj->ops->put_pages() */ err = remap_io_sg(area, area->vm_start, area->vm_end - area->vm_start, - obj->mm.pages->sgl, iomap); + obj->mm.pages->sgl, obj_offset, iomap); if (area->vm_flags & VM_WRITE) { GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); @@ -290,6 +292,47 @@ out: return i915_error_to_vmf_fault(err); } +static void set_address_limits(struct vm_area_struct *area, + struct i915_vma *vma, + unsigned long obj_offset, + resource_size_t gmadr_start, + unsigned long *start_vaddr, + unsigned long *end_vaddr, + unsigned long *pfn) +{ + unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */ + long start, end; /* memory boundaries */ + + /* + * Let's move into the ">> PAGE_SHIFT" + * domain to be sure not to lose bits + */ + vm_start = area->vm_start >> PAGE_SHIFT; + vm_end = area->vm_end >> PAGE_SHIFT; + vma_size = vma->size >> PAGE_SHIFT; + + /* + * Calculate the memory boundaries by considering the offset + * provided by the user during memory mapping and the offset + * provided for the partial mapping. + */ + start = vm_start; + start -= obj_offset; + start += vma->gtt_view.partial.offset; + end = start + vma_size; + + start = max_t(long, start, vm_start); + end = min_t(long, end, vm_end); + + /* Let's move back into the "<< PAGE_SHIFT" domain */ + *start_vaddr = (unsigned long)start << PAGE_SHIFT; + *end_vaddr = (unsigned long)end << PAGE_SHIFT; + + *pfn = (gmadr_start + i915_ggtt_offset(vma)) >> PAGE_SHIFT; + *pfn += (*start_vaddr - area->vm_start) >> PAGE_SHIFT; + *pfn += obj_offset - vma->gtt_view.partial.offset; +} + static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) { #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) @@ -302,14 +345,18 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) struct i915_ggtt *ggtt = to_gt(i915)->ggtt; bool write = area->vm_flags & VM_WRITE; struct i915_gem_ww_ctx ww; + unsigned long obj_offset; + unsigned long start, end; /* memory boundaries */ intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; + unsigned long pfn; int srcu; int ret; - /* We don't use vmf->pgoff since that has the fake offset */ + obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node); page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; + page_offset += obj_offset; trace_i915_gem_object_fault(obj, page_offset, true, write); @@ -402,12 +449,16 @@ retry: if (ret) goto err_unpin; + /* + * Dump all the necessary parameters in this function to perform the + * arithmetic calculation for the virtual address start and end and + * the PFN (Page Frame Number). + */ + set_address_limits(area, vma, obj_offset, ggtt->gmadr.start, + &start, &end, &pfn); + /* Finally, remap it using the new GTT offset */ - ret = remap_io_mapping(area, - area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT), - (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT, - min_t(u64, vma->size, area->vm_end - area->vm_start), - &ggtt->iomap); + ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap); if (ret) goto err_fence; @@ -1030,9 +1081,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) rcu_read_lock(); drm_vma_offset_lock_lookup(dev->vma_offset_manager); - node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, - vma->vm_pgoff, - vma_pages(vma)); + node = drm_vma_offset_lookup_locked(dev->vma_offset_manager, + vma->vm_pgoff, + vma_pages(vma)); if (node && drm_vma_node_is_allowed(node, priv)) { /* * Skip 0-refcnted objects as it is in the process of being @@ -1084,6 +1135,8 @@ int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma mmo = mmap_offset_attach(obj, mmap_type, NULL); if (IS_ERR(mmo)) return PTR_ERR(mmo); + + vma->vm_pgoff += drm_vma_node_start(&mmo->vma_node); } /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 1495b6074492..68413c05c812 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -535,7 +535,7 @@ struct drm_i915_gem_object { * I915_CACHE_NONE. The only exception is userptr objects, where we * instead force I915_CACHE_LLC, but we also don't allow userspace to * ever change the @cache_level for such objects. Another special case - * is dma-buf, which doesn't rely on @cache_dirty, but there we + * is dma-buf, which doesn't rely on @cache_dirty, but there we * always do a forced flush when acquiring the pages, if there is a * chance that the pages can be read directly from main memory with * the GPU. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index e6f177183c0f..5c72462d1f57 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -165,7 +165,6 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : obj->mm.region, &places[0], obj->bo_offset, obj->base.size, flags); - places[0].flags |= TTM_PL_FLAG_DESIRED; /* Cache this on object? */ for (i = 0; i < num_allowed; ++i) { @@ -779,13 +778,16 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, .interruptible = true, .no_wait_gpu = false, }; - int real_num_busy; + struct ttm_placement initial_placement; + struct ttm_place initial_place; int ret; /* First try only the requested placement. No eviction. */ - real_num_busy = placement->num_placement; - placement->num_placement = 1; - ret = ttm_bo_validate(bo, placement, &ctx); + initial_placement.num_placement = 1; + memcpy(&initial_place, placement->placement, sizeof(struct ttm_place)); + initial_place.flags |= TTM_PL_FLAG_DESIRED; + initial_placement.placement = &initial_place; + ret = ttm_bo_validate(bo, &initial_placement, &ctx); if (ret) { ret = i915_ttm_err_to_gem(ret); /* @@ -800,7 +802,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, * If the initial attempt fails, allow all accepted placements, * evicting if necessary. */ - placement->num_placement = real_num_busy; ret = ttm_bo_validate(bo, placement, &ctx); if (ret) return i915_ttm_err_to_gem(ret); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3b740ca25000..4d30a86016f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -693,6 +693,8 @@ void intel_engines_release(struct intel_gt *gt) memset(&engine->reset, 0, sizeof(engine->reset)); } + + llist_del_all(>->i915->uabi_engines_llist); } void intel_engine_free_request_pool(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 206a5e0fedf1..d60a6ca0cae5 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -12,7 +12,6 @@ #include <drm/intel/i915_drm.h> #include <drm/intel/intel-gtt.h> -#include "display/intel_display.h" #include "gem/i915_gem_lmem.h" #include "intel_context.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 2bd8d98d2110..5394bc7d4daf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -220,6 +220,7 @@ #define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) #define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) +#define CMD_3DSTATE_MESH_CONTROL ((0x3 << 29) | (0x3 << 27) | (0x0 << 24) | (0x77 << 16) | (0x3)) #define XY_CTRL_SURF_INSTR_SIZE 5 #define MI_FLUSH_DW_SIZE 3 diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index b5e114d284ad..998ca029b73a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -174,7 +174,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt) int intel_gt_probe_all(struct drm_i915_private *i915); int intel_gt_tiles_init(struct drm_i915_private *i915); -void intel_gt_release_all(struct drm_i915_private *i915); #define for_each_gt(gt__, i915__, id__) \ for ((id__) = 0; \ @@ -208,4 +207,10 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, void intel_gt_bind_context_set_ready(struct intel_gt *gt); void intel_gt_bind_context_set_unready(struct intel_gt *gt); bool intel_gt_is_bind_context_ready(struct intel_gt *gt); + +static inline void intel_gt_set_wedged_async(struct intel_gt *gt) +{ + queue_work(system_highpri_wq, >->wedge); +} + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index cfdd2ad5e954..bcee084b1f27 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -292,6 +292,8 @@ struct intel_gt { struct gt_defaults defaults; struct kobject *sysfs_defaults; + struct work_struct wedge; + struct i915_perf_gt perf; /** link: &ggtt.gt_list */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 735cd23a43c6..8f1ea95471ef 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1013,6 +1013,15 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) GT_TRACE(gt, "end\n"); } +static void set_wedged_work(struct work_struct *w) +{ + struct intel_gt *gt = container_of(w, struct intel_gt, wedge); + intel_wakeref_t wf; + + with_intel_runtime_pm(gt->uncore->rpm, wf) + __intel_gt_set_wedged(gt); +} + void intel_gt_set_wedged(struct intel_gt *gt) { intel_wakeref_t wakeref; @@ -1614,6 +1623,7 @@ void intel_gt_init_reset(struct intel_gt *gt) init_waitqueue_head(>->reset.queue); mutex_init(>->reset.mutex); init_srcu_struct(>->reset.backoff_srcu); + INIT_WORK(>->wedge, set_wedged_work); /* * While undesirable to wait inside the shrinker, complain anyway. @@ -1640,7 +1650,7 @@ static void intel_wedge_me(struct work_struct *work) struct intel_wedge_me *w = container_of(work, typeof(*w), work.work); gt_err(w->gt, "%s timed out, cancelling all in-flight rendering.\n", w->name); - intel_gt_set_wedged(w->gt); + set_wedged_work(&w->gt->wedge); } void __intel_init_wedge(struct intel_wedge_me *w, diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 09a287c1aedd..bfe6d8fc820f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -974,7 +974,12 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) if (ret) return ret; - cs = intel_ring_begin(rq, (wal->count * 2 + 2)); + if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || + IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS) + cs = intel_ring_begin(rq, (wal->count * 2 + 6)); + else + cs = intel_ring_begin(rq, (wal->count * 2 + 2)); + if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1004,6 +1009,15 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq) } *cs++ = MI_NOOP; + /* Wa_14019789679 */ + if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || + IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS) { + *cs++ = CMD_3DSTATE_MESH_CONTROL; + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + } + intel_uncore_forcewake_put__locked(uncore, fw); spin_unlock(&uncore->lock); intel_gt_mcr_unlock(wal->gt, flags); diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 3eff364ccf3a..ca460cee4f8b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -336,7 +336,7 @@ static int clear(struct intel_migrate *migrate, if (vaddr[x] != val) { pr_err("%ps failed, (%u != %u), offset: %zu\n", - fn, vaddr[x], val, x * sizeof(u32)); + fn, vaddr[x], val, x * sizeof(u32)); igt_hexdump(vaddr + i * 1024, 4096); err = -EINVAL; } diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h index 37ff539a6963..0c709e6c15be 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -107,6 +107,7 @@ enum { enum { GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE = 0x9001, GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, + GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE = 0x9006, }; #endif /* _ABI_GUC_KLVS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 3b69bc6616bd..551b0d7974ff 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -212,6 +212,37 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s } } + if (IS_ARROWLAKE(gt->i915)) { + bool too_old = false; + + /* + * ARL requires a newer firmware than MTL did (102.0.10.1878) but the + * firmware is actually common. So, need to do an explicit version check + * here rather than using a separate table entry. And if the older + * MTL-only version is found, then just don't use GSC rather than aborting + * the driver load. + */ + if (gsc->release.major < 102) { + too_old = true; + } else if (gsc->release.major == 102) { + if (gsc->release.minor == 0) { + if (gsc->release.patch < 10) { + too_old = true; + } else if (gsc->release.patch == 10) { + if (gsc->release.build < 1878) + too_old = true; + } + } + } + + if (too_old) { + gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least 102.0.10.1878", + gsc->release.major, gsc->release.minor, + gsc->release.patch, gsc->release.build); + return -EINVAL; + } + } + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 5e60a34692af..097fc6bd1285 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -296,7 +296,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) /* Wa_16019325821 */ /* Wa_14019159160 */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) flags |= GUC_WA_RCS_CCS_SWITCHOUT; /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 7995f059f30d..46fabbfc775e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -815,8 +815,7 @@ engine_instance_list: return PAGE_ALIGN(total_size); } -static void guc_waklv_enable_simple(struct intel_guc *guc, - u32 klv_id, u32 *offset, u32 *remain) +static void guc_waklv_enable_simple(struct intel_guc *guc, u32 *offset, u32 *remain, u32 klv_id) { u32 size; u32 klv_entry[] = { @@ -850,19 +849,20 @@ static void guc_waklv_init(struct intel_guc *guc) remain = guc_ads_waklv_size(guc); /* Wa_14019159160 */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) - guc_waklv_enable_simple(guc, - GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE, - &offset, &remain); + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { + guc_waklv_enable_simple(guc, &offset, &remain, + GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE); + guc_waklv_enable_simple(guc, &offset, &remain, + GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE); + } /* Wa_16021333562 */ if ((GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 21, 1)) && (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_MEDIA_GT_IP_RANGE(gt, IP_VER(13, 0), IP_VER(13, 0)) || IS_DG2(gt->i915))) - guc_waklv_enable_simple(guc, - GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, - &offset, &remain); + guc_waklv_enable_simple(guc, &offset, &remain, + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED); size = guc_ads_waklv_size(guc) - remain; if (!size) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9400d0eb682b..c3a5d9e1288e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2014,11 +2014,12 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) /* * Technically possible for either of these values to be non-zero here, - * but very unlikely + harmless. Regardless let's add a warn so we can + * but very unlikely + harmless. Regardless let's add an error so we can * see in CI if this happens frequently / a precursor to taking down the * machine. */ - GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); + if (atomic_read(&guc->outstanding_submission_g2h)) + guc_err(guc, "Unexpected outstanding GuC to Host in reset finish\n"); atomic_set(&guc->outstanding_submission_g2h, 0); intel_guc_global_policies_update(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 7a63abf8f644..5b8080ec5315 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -99,7 +99,7 @@ static void __confirm_options(struct intel_uc *uc) } if (!intel_uc_supports_guc(uc)) - gt_info(gt, "Incompatible option enable_guc=%d - %s\n", + gt_info(gt, "Incompatible option enable_guc=%d - %s\n", i915->params.enable_guc, "GuC is not supported!"); if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION && diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index d80278eb45d7..ec33ad942115 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -698,12 +698,18 @@ static int check_gsc_manifest(struct intel_gt *gt, const struct firmware *fw, struct intel_uc_fw *uc_fw) { + int ret; + switch (uc_fw->type) { case INTEL_UC_FW_TYPE_HUC: - intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + ret = intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + if (ret) + return ret; break; case INTEL_UC_FW_TYPE_GSC: - intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + ret = intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + if (ret) + return ret; break; default: MISSING_CASE(uc_fw->type); diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c index af9afdb53c7f..c022dc736045 100644 --- a/drivers/gpu/drm/i915/gvt/edid.c +++ b/drivers/gpu/drm/i915/gvt/edid.c @@ -42,8 +42,8 @@ #define GMBUS1_TOTAL_BYTES_MASK 0x1ff #define gmbus1_total_byte_count(v) (((v) >> \ GMBUS1_TOTAL_BYTES_SHIFT) & GMBUS1_TOTAL_BYTES_MASK) -#define gmbus1_slave_addr(v) (((v) & 0xff) >> 1) -#define gmbus1_slave_index(v) (((v) >> 8) & 0xff) +#define gmbus1_target_addr(v) (((v) & 0xff) >> 1) +#define gmbus1_target_index(v) (((v) >> 8) & 0xff) #define gmbus1_bus_cycle(v) (((v) >> 25) & 0x7) /* GMBUS0 bits definitions */ @@ -54,7 +54,7 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu) struct intel_vgpu_i2c_edid *edid = &vgpu->display.i2c_edid; unsigned char chr = 0; - if (edid->state == I2C_NOT_SPECIFIED || !edid->slave_selected) { + if (edid->state == I2C_NOT_SPECIFIED || !edid->target_selected) { gvt_vgpu_err("Driver tries to read EDID without proper sequence!\n"); return 0; } @@ -179,7 +179,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { struct intel_vgpu_i2c_edid *i2c_edid = &vgpu->display.i2c_edid; - u32 slave_addr; + u32 target_addr; u32 wvalue = *(u32 *)p_data; if (vgpu_vreg(vgpu, offset) & GMBUS_SW_CLR_INT) { @@ -210,21 +210,21 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, i2c_edid->gmbus.total_byte_count = gmbus1_total_byte_count(wvalue); - slave_addr = gmbus1_slave_addr(wvalue); + target_addr = gmbus1_target_addr(wvalue); /* vgpu gmbus only support EDID */ - if (slave_addr == EDID_ADDR) { - i2c_edid->slave_selected = true; - } else if (slave_addr != 0) { + if (target_addr == EDID_ADDR) { + i2c_edid->target_selected = true; + } else if (target_addr != 0) { gvt_dbg_dpy( - "vgpu%d: unsupported gmbus slave addr(0x%x)\n" + "vgpu%d: unsupported gmbus target addr(0x%x)\n" " gmbus operations will be ignored.\n", - vgpu->id, slave_addr); + vgpu->id, target_addr); } if (wvalue & GMBUS_CYCLE_INDEX) i2c_edid->current_edid_read = - gmbus1_slave_index(wvalue); + gmbus1_target_index(wvalue); i2c_edid->gmbus.cycle_type = gmbus1_bus_cycle(wvalue); switch (gmbus1_bus_cycle(wvalue)) { @@ -523,7 +523,7 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu, } else if (addr == EDID_ADDR) { i2c_edid->state = I2C_AUX_CH; i2c_edid->port = port_idx; - i2c_edid->slave_selected = true; + i2c_edid->target_selected = true; if (intel_vgpu_has_monitor_on_port(vgpu, port_idx) && intel_vgpu_port_is_dp(vgpu, port_idx)) @@ -542,7 +542,7 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu, return; if (drm_WARN_ON(&i915->drm, msg_length != 4)) return; - if (i2c_edid->edid_available && i2c_edid->slave_selected) { + if (i2c_edid->edid_available && i2c_edid->target_selected) { unsigned char val = edid_get_byte(vgpu); aux_data_for_write = (val << 16); @@ -571,7 +571,7 @@ void intel_vgpu_init_i2c_edid(struct intel_vgpu *vgpu) edid->state = I2C_NOT_SPECIFIED; edid->port = -1; - edid->slave_selected = false; + edid->target_selected = false; edid->edid_available = false; edid->current_edid_read = 0; diff --git a/drivers/gpu/drm/i915/gvt/edid.h b/drivers/gpu/drm/i915/gvt/edid.h index dfe0cbc6aad8..c3b5a55aecb3 100644 --- a/drivers/gpu/drm/i915/gvt/edid.h +++ b/drivers/gpu/drm/i915/gvt/edid.h @@ -80,7 +80,7 @@ enum gmbus_cycle_type { * R/W Protect * Command and Status. * bit0 is the direction bit: 1 is read; 0 is write. - * bit1 - bit7 is slave 7-bit address. + * bit1 - bit7 is target 7-bit address. * bit16 - bit24 total byte count (ignore?) * * GMBUS2: @@ -130,7 +130,7 @@ struct intel_vgpu_i2c_edid { enum i2c_state state; unsigned int port; - bool slave_selected; + bool target_selected; bool edid_available; unsigned int current_edid_read; diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index d2bed466540a..908f910420c2 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -86,7 +86,7 @@ struct efp_child_device_config { u8 skip2; u8 dvo_port; u8 i2c_pin; /* for add-in card */ - u8 slave_addr; /* for add-in card */ + u8 target_addr; /* for add-in card */ u8 ddc_pin; u16 edid_ptr; u8 dvo_config; diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 020f1aa28322..63874d385c6f 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -227,7 +227,7 @@ TRACE_EVENT(oos_sync, #define GVT_CMD_STR_LEN 40 TRACE_EVENT(gvt_command, TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, - u32 cmd_len, u32 buf_type, u32 buf_addr_type, + u32 cmd_len, u32 buf_type, u32 buf_addr_type, void *workload, const char *cmd_name), TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type, diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bc717cf544e4..f969f585d07b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -66,6 +66,7 @@ static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) static int i915_capabilities(struct seq_file *m, void *data) { struct drm_i915_private *i915 = node_to_i915(m->private); + struct intel_display *display = &i915->display; struct drm_printer p = drm_seq_file_printer(m); seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(i915)); @@ -77,7 +78,7 @@ static int i915_capabilities(struct seq_file *m, void *data) kernel_param_lock(THIS_MODULE); i915_params_dump(&i915->params, &p); - intel_display_params_dump(i915, &p); + intel_display_params_dump(display, &p); kernel_param_unlock(THIS_MODULE); return 0; diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index fb8e9c2fcea5..ccdd2983cfb5 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -49,7 +49,7 @@ #include "display/intel_bw.h" #include "display/intel_cdclk.h" #include "display/intel_display_driver.h" -#include "display/intel_display_types.h" +#include "display/intel_display.h" #include "display/intel_dmc.h" #include "display/intel_dp.h" #include "display/intel_dpt.h" @@ -58,10 +58,8 @@ #include "display/intel_hotplug.h" #include "display/intel_overlay.h" #include "display/intel_pch_refclk.h" -#include "display/intel_pipe_crc.h" #include "display/intel_pps.h" #include "display/intel_sprite.h" -#include "display/intel_vga.h" #include "display/skl_watermark.h" #include "gem/i915_gem_context.h" @@ -442,6 +440,7 @@ static int i915_pcode_init(struct drm_i915_private *i915) */ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); int ret; @@ -451,8 +450,8 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (HAS_PPGTT(dev_priv)) { if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_full_ppgtt(dev_priv)) { - i915_report_error(dev_priv, - "incompatible vGPU found, support for isolated ppGTT required\n"); + drm_err(&dev_priv->drm, + "incompatible vGPU found, support for isolated ppGTT required\n"); return -ENXIO; } } @@ -465,8 +464,8 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) */ if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_hwsp_emulation(dev_priv)) { - i915_report_error(dev_priv, - "old vGPU host found, support for HWSP emulation required\n"); + drm_err(&dev_priv->drm, + "old vGPU host found, support for HWSP emulation required\n"); return -ENXIO; } } @@ -542,7 +541,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) goto err_msi; - intel_opregion_setup(dev_priv); + intel_opregion_setup(display); ret = i915_pcode_init(dev_priv); if (ret) @@ -559,7 +558,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) return 0; err_opregion: - intel_opregion_cleanup(dev_priv); + intel_opregion_cleanup(display); err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); @@ -580,11 +579,12 @@ err_perf: */ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) { + struct intel_display *display = &dev_priv->display; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); i915_perf_fini(dev_priv); - intel_opregion_cleanup(dev_priv); + intel_opregion_cleanup(display); if (pdev->msi_enabled) pci_disable_msi(pdev); @@ -1014,6 +1014,7 @@ static int i915_drm_prepare(struct drm_device *dev) static int i915_drm_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = &dev_priv->display; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); pci_power_t opregion_target_state; @@ -1049,7 +1050,7 @@ static int i915_drm_suspend(struct drm_device *dev) i915_save_display(dev_priv); opregion_target_state = suspend_to_idle(dev_priv) ? PCI_D1 : PCI_D3cold; - intel_opregion_suspend(dev_priv, opregion_target_state); + intel_opregion_suspend(display, opregion_target_state); dev_priv->suspend_count++; @@ -1138,6 +1139,7 @@ int i915_driver_suspend_switcheroo(struct drm_i915_private *i915, static int i915_drm_resume(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_display *display = &dev_priv->display; struct intel_gt *gt; int ret, i; @@ -1205,7 +1207,7 @@ static int i915_drm_resume(struct drm_device *dev) } intel_hpd_poll_disable(dev_priv); - intel_opregion_resume(dev_priv); + intel_opregion_resume(display); intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING, false); @@ -1454,6 +1456,7 @@ static int i915_pm_restore(struct device *kdev) static int intel_runtime_suspend(struct device *kdev) { struct drm_i915_private *dev_priv = kdev_to_i915(kdev); + struct intel_display *display = &dev_priv->display; struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); struct pci_dev *root_pdev; @@ -1528,7 +1531,7 @@ static int intel_runtime_suspend(struct device *kdev) * won't be able to restore them. Since PCI_D3hot matches the * actual specification and appears to be working, use it. */ - intel_opregion_notify_adapter(dev_priv, PCI_D3hot); + intel_opregion_notify_adapter(display, PCI_D3hot); } else { /* * current versions of firmware which depend on this opregion @@ -1537,7 +1540,7 @@ static int intel_runtime_suspend(struct device *kdev) * to distinguish it from notifications that might be sent via * the suspend path. */ - intel_opregion_notify_adapter(dev_priv, PCI_D1); + intel_opregion_notify_adapter(display, PCI_D1); } assert_forcewakes_inactive(&dev_priv->uncore); @@ -1552,6 +1555,7 @@ static int intel_runtime_suspend(struct device *kdev) static int intel_runtime_resume(struct device *kdev) { struct drm_i915_private *dev_priv = kdev_to_i915(kdev); + struct intel_display *display = &dev_priv->display; struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); struct pci_dev *root_pdev; @@ -1566,7 +1570,7 @@ static int intel_runtime_resume(struct device *kdev) drm_WARN_ON_ONCE(&dev_priv->drm, atomic_read(&rpm->wakeref_count)); disable_rpm_wakeref_asserts(rpm); - intel_opregion_notify_adapter(dev_priv, PCI_D0); + intel_opregion_notify_adapter(display, PCI_D0); root_pdev = pcie_find_root_port(pdev); if (root_pdev) @@ -1693,9 +1697,9 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_FREE, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_INIT_HEAP, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_CMDBUFFER, drm_noop, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_DESTROY_HEAP, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF_DRV(I915_SET_VBLANK_PIPE, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), - DRM_IOCTL_DEF_DRV(I915_GET_VBLANK_PIPE, drm_noop, DRM_AUTH), + DRM_IOCTL_DEF_DRV(I915_DESTROY_HEAP, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_SET_VBLANK_PIPE, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(I915_GET_VBLANK_PIPE, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_VBLANK_SWAP, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d7723dd11c80..d772cbe15fec 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -408,14 +408,8 @@ static inline struct intel_gt *to_gt(const struct drm_i915_private *i915) #define INTEL_REVID(i915) (to_pci_dev((i915)->drm.dev)->revision) -#define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step) #define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step) #define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step) -#define INTEL_BASEDIE_STEP(__i915) (RUNTIME_INFO(__i915)->step.basedie_step) - -#define IS_DISPLAY_STEP(__i915, since, until) \ - (drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \ - INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) < (until)) #define IS_GRAPHICS_STEP(__i915, since, until) \ (drm_WARN_ON(&(__i915)->drm, INTEL_GRAPHICS_STEP(__i915) == STEP_NONE), \ @@ -425,10 +419,6 @@ static inline struct intel_gt *to_gt(const struct drm_i915_private *i915) (drm_WARN_ON(&(__i915)->drm, INTEL_MEDIA_STEP(__i915) == STEP_NONE), \ INTEL_MEDIA_STEP(__i915) >= (since) && INTEL_MEDIA_STEP(__i915) < (until)) -#define IS_BASEDIE_STEP(__i915, since, until) \ - (drm_WARN_ON(&(__i915)->drm, INTEL_BASEDIE_STEP(__i915) == STEP_NONE), \ - INTEL_BASEDIE_STEP(__i915) >= (since) && INTEL_BASEDIE_STEP(__i915) < (until)) - static __always_inline unsigned int __platform_mask_index(const struct intel_runtime_info *info, enum intel_platform p) @@ -546,6 +536,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_LUNARLAKE(i915) (0 && i915) #define IS_BATTLEMAGE(i915) (0 && i915) +#define IS_ARROWLAKE(i915) \ + IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_ARL) #define IS_DG2_G10(i915) \ IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G10) #define IS_DG2_G11(i915) \ @@ -678,9 +670,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, ((sizes) & ~RUNTIME_INFO(i915)->page_sizes) == 0; \ }) -/* Early gen2 have a totally busted CS tlb and require pinned batches. */ -#define HAS_BROKEN_CS_TLB(i915) (IS_I830(i915) || IS_I845G(i915)) - #define NEEDS_RC6_CTX_CORRUPTION_WA(i915) \ (IS_BROADWELL(i915) || GRAPHICS_VER(i915) == 9) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1391c01d7663..070ab6546987 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -39,8 +39,6 @@ #include <drm/drm_cache.h> #include <drm/drm_vma_manager.h> -#include "display/intel_display.h" - #include "gem/i915_gem_clflush.h" #include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7bd1861ddbdf..a9662cc6ed1e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -15,7 +15,6 @@ #include <asm/set_memory.h> #include <asm/smp.h> -#include "display/intel_frontbuffer.h" #include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 96c6cafd5b9e..6469b9bcf2ec 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -660,9 +660,10 @@ static void err_print_params(struct drm_i915_error_state_buf *m, const struct i915_params *params) { struct drm_printer p = i915_error_printer(m); + struct intel_display *display = &m->i915->display; i915_params_dump(params, &p); - intel_display_params_dump(m->i915, &p); + intel_display_params_dump(display, &p); } static void err_print_pciid(struct drm_i915_error_state_buf *m, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8059ac7e15fe..2321de48d169 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -34,7 +34,6 @@ #include <drm/drm_drv.h> #include "display/intel_display_irq.h" -#include "display/intel_display_types.h" #include "display/intel_hotplug.h" #include "display/intel_hotplug_irq.h" #include "display/intel_lpe_audio.h" diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c index 7998bc74ab49..f5c97a620962 100644 --- a/drivers/gpu/drm/i915/i915_mm.c +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -122,13 +122,15 @@ int remap_io_mapping(struct vm_area_struct *vma, * @addr: target user address to start at * @size: size of map area * @sgl: Start sg entry + * @offset: offset from the start of the page * @iobase: Use stored dma address offset by this address or pfn if -1 * * Note: this is only safe if the mm semaphore is held when called. */ int remap_io_sg(struct vm_area_struct *vma, unsigned long addr, unsigned long size, - struct scatterlist *sgl, resource_size_t iobase) + struct scatterlist *sgl, unsigned long offset, + resource_size_t iobase) { struct remap_pfn r = { .mm = vma->vm_mm, @@ -141,6 +143,14 @@ int remap_io_sg(struct vm_area_struct *vma, /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS); + while (offset >= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT) { + offset -= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT; + r.sgt = __sgt_iter(__sg_next(r.sgt.sgp), use_dma(iobase)); + if (!r.sgt.sgp) + return -EINVAL; + } + r.sgt.curr = offset << PAGE_SHIFT; + if (!use_dma(iobase)) flush_cache_range(vma, addr, size); diff --git a/drivers/gpu/drm/i915/i915_mm.h b/drivers/gpu/drm/i915/i915_mm.h index 04c8974d822b..69f9351b1a1c 100644 --- a/drivers/gpu/drm/i915/i915_mm.h +++ b/drivers/gpu/drm/i915/i915_mm.h @@ -30,6 +30,7 @@ int remap_io_mapping(struct vm_area_struct *vma, int remap_io_sg(struct vm_area_struct *vma, unsigned long addr, unsigned long size, - struct scatterlist *sgl, resource_size_t iobase); + struct scatterlist *sgl, unsigned long offset, + resource_size_t iobase); #endif /* __I915_MM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ce4dfd65fafa..d37bb3a704d0 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -26,7 +26,6 @@ #include <drm/drm_drv.h> #include <drm/intel/i915_pciids.h> -#include "display/intel_display.h" #include "display/intel_display_driver.h" #include "gt/intel_gt_regs.h" #include "gt/intel_sa_media.h" @@ -880,7 +879,7 @@ static void i915_pci_remove(struct pci_dev *pdev) { struct drm_i915_private *i915; - i915 = pci_get_drvdata(pdev); + i915 = pdev_to_i915(pdev); if (!i915) /* driver load aborted, nothing to cleanup */ return; @@ -1003,7 +1002,7 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - if (i915_inject_probe_failure(pci_get_drvdata(pdev))) { + if (i915_inject_probe_failure(pdev_to_i915(pdev))) { i915_pci_remove(pdev); return -ENODEV; } @@ -1025,7 +1024,7 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) static void i915_pci_shutdown(struct pci_dev *pdev) { - struct drm_i915_private *i915 = pci_get_drvdata(pdev); + struct drm_i915_private *i915 = pdev_to_i915(pdev); i915_driver_shutdown(i915); } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0e3d79227e3c..41f4350a7c6c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2396,6 +2396,7 @@ /* Display Internal Timeout Register */ #define RM_TIMEOUT _MMIO(0x42060) +#define RM_TIMEOUT_REG_CAPTURE _MMIO(0x420E0) #define MMIO_TIMEOUT_US(us) ((us) << 0) /* interrupts */ @@ -2515,6 +2516,10 @@ #define GEN11_PIPE_PLANE7_FLIP_DONE REG_BIT(18) /* icl/tgl */ #define GEN11_PIPE_PLANE6_FLIP_DONE REG_BIT(17) /* icl/tgl */ #define GEN11_PIPE_PLANE5_FLIP_DONE REG_BIT(16) /* icl+ */ +#define GEN12_DSB_2_INT REG_BIT(15) /* tgl+ */ +#define GEN12_DSB_1_INT REG_BIT(14) /* tgl+ */ +#define GEN12_DSB_0_INT REG_BIT(13) /* tgl+ */ +#define GEN12_DSB_INT(dsb_id) REG_BIT(13 + (dsb_id)) #define GEN9_PIPE_CURSOR_FAULT REG_BIT(11) /* skl+ */ #define GEN9_PIPE_PLANE4_FAULT REG_BIT(10) /* skl+ */ #define GEN8_PIPE_CURSOR_FAULT REG_BIT(10) /* bdw */ @@ -2574,6 +2579,7 @@ #define GEN8_DE_MISC_IMR _MMIO(0x44464) #define GEN8_DE_MISC_IIR _MMIO(0x44468) #define GEN8_DE_MISC_IER _MMIO(0x4446c) +#define XELPDP_RM_TIMEOUT REG_BIT(29) #define XELPDP_PMDEMAND_RSPTOUT_ERR REG_BIT(27) #define GEN8_DE_MISC_GSE REG_BIT(27) #define GEN8_DE_EDP_PSR REG_BIT(19) diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 613decd47760..8775beab9cb8 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -191,8 +191,8 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv) i915_gpu_error_sysfs_teardown(dev_priv); - device_remove_bin_file(kdev, &dpf_attrs_1); - device_remove_bin_file(kdev, &dpf_attrs); + device_remove_bin_file(kdev, &dpf_attrs_1); + device_remove_bin_file(kdev, &dpf_attrs); kobject_put(dev_priv->sysfs_gt); } diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index 6f9e7b354b54..b34a2d3d331d 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -11,51 +11,10 @@ #include "i915_reg.h" #include "i915_utils.h" -#define FDO_BUG_MSG "Please file a bug on drm/i915; see " FDO_BUG_URL " for details." - -void -__i915_printk(struct drm_i915_private *dev_priv, const char *level, - const char *fmt, ...) -{ - static bool shown_bug_once; - struct device *kdev = dev_priv->drm.dev; - bool is_error = level[1] <= KERN_ERR[1]; - bool is_debug = level[1] == KERN_DEBUG[1]; - struct va_format vaf; - va_list args; - - if (is_debug && !drm_debug_enabled(DRM_UT_DRIVER)) - return; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - if (is_error) - dev_printk(level, kdev, "%pV", &vaf); - else - dev_printk(level, kdev, "[" DRM_NAME ":%ps] %pV", - __builtin_return_address(0), &vaf); - - va_end(args); - - if (is_error && !shown_bug_once) { - /* - * Ask the user to file a bug report for the error, except - * if they may have caused the bug by fiddling with unsafe - * module parameters. - */ - if (!test_taint(TAINT_USER)) - dev_notice(kdev, "%s", FDO_BUG_MSG); - shown_bug_once = true; - } -} - void add_taint_for_CI(struct drm_i915_private *i915, unsigned int taint) { - __i915_printk(i915, KERN_NOTICE, "CI tainted:%#x by %pS\n", - taint, (void *)_RET_IP_); + drm_notice(&i915->drm, "CI tainted: %#x by %pS\n", + taint, __builtin_return_address(0)); /* Failures that occur during fault injection testing are expected */ if (!i915_error_injected()) @@ -74,9 +33,9 @@ int __i915_inject_probe_error(struct drm_i915_private *i915, int err, if (++i915_probe_fail_count < i915_modparams.inject_probe_failure) return 0; - __i915_printk(i915, KERN_INFO, - "Injecting failure %d at checkpoint %u [%s:%d]\n", - err, i915_modparams.inject_probe_failure, func, line); + drm_info(&i915->drm, "Injecting failure %d at checkpoint %u [%s:%d]\n", + err, i915_modparams.inject_probe_failure, func, line); + i915_modparams.inject_probe_failure = 0; return err; } diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 06ec6ceb61d5..71bdc89bd621 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -45,13 +45,6 @@ struct timer_list; #define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ __stringify(x), (long)(x)) -void __printf(3, 4) -__i915_printk(struct drm_i915_private *dev_priv, const char *level, - const char *fmt, ...); - -#define i915_report_error(dev_priv, fmt, ...) \ - __i915_printk(dev_priv, KERN_ERR, fmt, ##__VA_ARGS__) - #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) int __i915_inject_probe_error(struct drm_i915_private *i915, int err, @@ -69,9 +62,12 @@ bool i915_error_injected(void); #define i915_inject_probe_failure(i915) i915_inject_probe_error((i915), -ENODEV) -#define i915_probe_error(i915, fmt, ...) \ - __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \ - fmt, ##__VA_ARGS__) +#define i915_probe_error(i915, fmt, ...) ({ \ + if (i915_error_injected()) \ + drm_dbg(&(i915)->drm, fmt, ##__VA_ARGS__); \ + else \ + drm_err(&(i915)->drm, fmt, ##__VA_ARGS__); \ +}) #define range_overflows(start, size, max) ({ \ typeof(start) start__ = (start); \ diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d26de37719a7..3c47c625993e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -108,8 +108,6 @@ void intel_device_info_print(const struct intel_device_info *info, drm_printf(p, "graphics stepping: %s\n", intel_step_name(runtime->step.graphics_step)); drm_printf(p, "media stepping: %s\n", intel_step_name(runtime->step.media_step)); - drm_printf(p, "display stepping: %s\n", intel_step_name(runtime->step.display_step)); - drm_printf(p, "base die stepping: %s\n", intel_step_name(runtime->step.basedie_step)); drm_printf(p, "gt: %d\n", info->gt); drm_printf(p, "memory-regions: 0x%x\n", info->memory_regions); @@ -124,7 +122,6 @@ void intel_device_info_print(const struct intel_device_info *info, #undef PRINT_FLAG drm_printf(p, "has_pooled_eu: %s\n", str_yes_no(runtime->has_pooled_eu)); - drm_printf(p, "rawclk rate: %u kHz\n", runtime->rawclk_freq); } #define ID(id) (id) @@ -203,6 +200,10 @@ static const u16 subplatform_g12_ids[] = { INTEL_DG2_G12_IDS(ID), }; +static const u16 subplatform_arl_ids[] = { + INTEL_ARL_IDS(ID), +}; + static bool find_devid(u16 id, const u16 *p, unsigned int num) { for (; num; num--, p++) { @@ -260,6 +261,9 @@ static void intel_device_info_subplatform_init(struct drm_i915_private *i915) } else if (find_devid(devid, subplatform_g12_ids, ARRAY_SIZE(subplatform_g12_ids))) { mask = BIT(INTEL_SUBPLATFORM_G12); + } else if (find_devid(devid, subplatform_arl_ids, + ARRAY_SIZE(subplatform_arl_ids))) { + mask = BIT(INTEL_SUBPLATFORM_ARL); } GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK); @@ -370,10 +374,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) "Disabling ppGTT for VT-d support\n"); runtime->ppgtt_type = INTEL_PPGTT_NONE; } - - runtime->rawclk_freq = intel_read_rawclk(dev_priv); - drm_dbg(&dev_priv->drm, "rawclk rate: %d kHz\n", runtime->rawclk_freq); - } /* diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index d1a2abc7e513..643ff1bf74ee 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -127,6 +127,9 @@ enum intel_platform { #define INTEL_SUBPLATFORM_N 1 #define INTEL_SUBPLATFORM_RPLU 2 +/* MTL */ +#define INTEL_SUBPLATFORM_ARL 0 + enum intel_ppgtt_type { INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE, INTEL_PPGTT_ALIASING = I915_GEM_PPGTT_ALIASING, @@ -204,8 +207,6 @@ struct intel_runtime_info { u16 device_id; - u32 rawclk_freq; - struct intel_step_info step; unsigned int page_sizes; /* page sizes supported by the HW */ diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index a5adfb5d8fd2..285b96fadfd5 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -23,8 +23,7 @@ * use a macro to define these to make it easier to identify the platforms * where the two steppings can deviate. */ -#define COMMON_STEP(x) .graphics_step = STEP_##x, .display_step = STEP_##x, .media_step = STEP_##x -#define COMMON_GT_MEDIA_STEP(x) .graphics_step = STEP_##x, .media_step = STEP_##x +#define COMMON_STEP(x) .graphics_step = STEP_##x, .media_step = STEP_##x static const struct intel_step_info skl_revids[] = { [0x6] = { COMMON_STEP(G0) }, @@ -34,13 +33,13 @@ static const struct intel_step_info skl_revids[] = { }; static const struct intel_step_info kbl_revids[] = { - [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, - [2] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 }, - [3] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_B0 }, - [4] = { COMMON_GT_MEDIA_STEP(F0), .display_step = STEP_C0 }, - [5] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B1 }, - [6] = { COMMON_GT_MEDIA_STEP(D1), .display_step = STEP_B1 }, - [7] = { COMMON_GT_MEDIA_STEP(G0), .display_step = STEP_C0 }, + [1] = { COMMON_STEP(B0) }, + [2] = { COMMON_STEP(C0) }, + [3] = { COMMON_STEP(D0) }, + [4] = { COMMON_STEP(F0) }, + [5] = { COMMON_STEP(C0) }, + [6] = { COMMON_STEP(D1) }, + [7] = { COMMON_STEP(G0) }, }; static const struct intel_step_info bxt_revids[] = { @@ -64,16 +63,16 @@ static const struct intel_step_info jsl_ehl_revids[] = { }; static const struct intel_step_info tgl_uy_revids[] = { - [0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, - [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 }, - [2] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 }, - [3] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 }, + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, + [2] = { COMMON_STEP(B1) }, + [3] = { COMMON_STEP(C0) }, }; /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ static const struct intel_step_info tgl_revids[] = { - [0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 }, - [1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_D0 }, + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, }; static const struct intel_step_info rkl_revids[] = { @@ -88,49 +87,49 @@ static const struct intel_step_info dg1_revids[] = { }; static const struct intel_step_info adls_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A2 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(D0) }, }; static const struct intel_step_info adlp_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(C0) }, }; static const struct intel_step_info dg2_g10_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display_step = STEP_A0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A1) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, }; static const struct intel_step_info dg2_g11_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 }, - [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x5] = { COMMON_STEP(B1) }, }; static const struct intel_step_info dg2_g12_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_C0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display_step = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A1) }, }; static const struct intel_step_info adls_rpls_revids[] = { - [0x4] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_D0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_C0 }, + [0x4] = { COMMON_STEP(D0) }, + [0xC] = { COMMON_STEP(D0) }, }; static const struct intel_step_info adlp_rplp_revids[] = { - [0x4] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_E0 }, + [0x4] = { COMMON_STEP(C0) }, }; static const struct intel_step_info adlp_n_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_D0 }, + [0x0] = { COMMON_STEP(A0) }, }; static u8 gmd_to_intel_step(struct drm_i915_private *i915, @@ -158,11 +157,6 @@ void intel_step_init(struct drm_i915_private *i915) &RUNTIME_INFO(i915)->graphics.ip); step.media_step = gmd_to_intel_step(i915, &RUNTIME_INFO(i915)->media.ip); - step.display_step = STEP_A0 + DISPLAY_RUNTIME_INFO(i915)->ip.step; - if (step.display_step >= STEP_FUTURE) { - drm_dbg(&i915->drm, "Using future display steppings\n"); - step.display_step = STEP_FUTURE; - } RUNTIME_INFO(i915)->step = step; @@ -252,7 +246,6 @@ void intel_step_init(struct drm_i915_private *i915) } else { drm_dbg(&i915->drm, "Using future steppings\n"); step.graphics_step = STEP_FUTURE; - step.display_step = STEP_FUTURE; } } @@ -275,8 +268,3 @@ const char *intel_step_name(enum intel_step step) return "**"; } } - -const char *intel_display_step_name(struct drm_i915_private *i915) -{ - return intel_step_name(RUNTIME_INFO(i915)->step.display_step); -} diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h index b6f43b624774..22f1d6905160 100644 --- a/drivers/gpu/drm/i915/intel_step.h +++ b/drivers/gpu/drm/i915/intel_step.h @@ -16,9 +16,7 @@ struct intel_step_info { * the expectation breaks gmd_to_intel_step(). */ u8 graphics_step; /* Represents the compute tile on Xe_HPC */ - u8 display_step; u8 media_step; - u8 basedie_step; }; #define STEP_ENUM_VAL(name) STEP_##name, @@ -78,6 +76,5 @@ enum intel_step { void intel_step_init(struct drm_i915_private *i915); const char *intel_step_name(enum intel_step step); -const char *intel_display_step_name(struct drm_i915_private *i915); #endif /* __INTEL_STEP_H__ */ diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 2eba289d88ad..6aa179a3e92a 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -24,6 +24,7 @@ #include <drm/drm_managed.h> #include <linux/pm_runtime.h> +#include "gt/intel_gt.h" #include "gt/intel_engine_regs.h" #include "gt/intel_gt_regs.h" @@ -180,14 +181,16 @@ fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d) if (!wait_ack_clear(d, FORCEWAKE_KERNEL)) return; - if (fw_ack(d) == ~0) + if (fw_ack(d) == ~0) { drm_err(&d->uncore->i915->drm, "%s: MMIO unreliable (forcewake register returns 0xFFFFFFFF)!\n", intel_uncore_forcewake_domain_to_str(d->id)); - else + intel_gt_set_wedged_async(d->uncore->gt); + } else { drm_err(&d->uncore->i915->drm, "%s: timed out waiting for forcewake ack to clear.\n", intel_uncore_forcewake_domain_to_str(d->id)); + } add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */ } diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index ae6070b5bf07..f08f6674911e 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -517,7 +517,7 @@ static int igt_mock_max_segment(void *arg) if (!IS_ALIGNED(daddr, ps)) { pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n", - __func__, &daddr, ps); + __func__, &daddr, ps); err = -EINVAL; goto out_close; } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index ae5c6ec24a1e..77b50c56c124 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -539,8 +539,8 @@ static int mtk_drm_kms_init(struct drm_device *drm) } /* IGT will check if the cursor size is configured */ - drm->mode_config.cursor_width = drm->mode_config.max_width; - drm->mode_config.cursor_height = drm->mode_config.max_height; + drm->mode_config.cursor_width = 512; + drm->mode_config.cursor_height = 512; /* Use OVL device for all DMA memory allocations */ crtc = drm_crtc_from_index(drm, 0); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 1c6626747b98..ecc3fc5cec22 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -99,7 +99,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, * was a bad idea, and is only provided for backwards * compatibility for older targets. */ - return -ENODEV; + return -ENOENT; } if (IS_ERR(fw)) { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 34c56e855af7..3b171bf227d1 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1171,8 +1171,6 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc, cstate->num_mixers = num_lm; - dpu_enc->connector = conn_state->connector; - for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; @@ -1270,6 +1268,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc, dpu_enc->commit_done_timedout = false; + dpu_enc->connector = drm_atomic_get_new_connector_for_encoder(state, drm_enc); + cur_mode = &dpu_enc->base.crtc->state->adjusted_mode; dpu_enc->wide_bus_en = dpu_encoder_is_widebus_enabled(drm_enc); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index fc178ec73907..648c8d0a4c36 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -308,8 +308,8 @@ static const u32 wb2_formats_rgb_yuv[] = { { \ .maxdwnscale = SSPP_UNITY_SCALE, \ .maxupscale = SSPP_UNITY_SCALE, \ - .format_list = plane_formats_yuv, \ - .num_formats = ARRAY_SIZE(plane_formats_yuv), \ + .format_list = plane_formats, \ + .num_formats = ARRAY_SIZE(plane_formats), \ .virt_format_list = plane_formats, \ .virt_num_formats = ARRAY_SIZE(plane_formats), \ } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index e2adc937ea63..935ff6fd172c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -31,24 +31,14 @@ * @fmt: Pointer to format string */ #define DPU_DEBUG(fmt, ...) \ - do { \ - if (drm_debug_enabled(DRM_UT_KMS)) \ - DRM_DEBUG(fmt, ##__VA_ARGS__); \ - else \ - pr_debug(fmt, ##__VA_ARGS__); \ - } while (0) + DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) /** * DPU_DEBUG_DRIVER - macro for hardware driver logging * @fmt: Pointer to format string */ #define DPU_DEBUG_DRIVER(fmt, ...) \ - do { \ - if (drm_debug_enabled(DRM_UT_DRIVER)) \ - DRM_ERROR(fmt, ##__VA_ARGS__); \ - else \ - pr_debug(fmt, ##__VA_ARGS__); \ - } while (0) + DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) #define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__) #define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 40c4dd2c3139..29298e066163 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -681,6 +681,9 @@ static int dpu_plane_prepare_fb(struct drm_plane *plane, new_state->fb, &layout); if (ret) { DPU_ERROR_PLANE(pdpu, "failed to get format layout, %d\n", ret); + if (pstate->aspace) + msm_framebuffer_cleanup(new_state->fb, pstate->aspace, + pstate->needs_dirtyfb); return ret; } @@ -744,10 +747,9 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1; if (MSM_FORMAT_IS_YUV(fmt) && - (!pipe->sspp->cap->sblk->scaler_blk.len || - !pipe->sspp->cap->sblk->csc_blk.len)) { + !pipe->sspp->cap->sblk->csc_blk.len) { DPU_DEBUG_PLANE(pdpu, - "plane doesn't have scaler/csc for yuv\n"); + "plane doesn't have csc for yuv\n"); return -EINVAL; } @@ -864,6 +866,10 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, max_linewidth = pdpu->catalog->caps->max_linewidth; + drm_rect_rotate(&pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + if ((drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) || _dpu_plane_calc_clk(&crtc_state->adjusted_mode, pipe_cfg) > max_mdp_clk_rate) { /* @@ -913,6 +919,14 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, r_pipe_cfg->dst_rect.x1 = pipe_cfg->dst_rect.x2; } + drm_rect_rotate_inv(&pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + if (r_pipe->sspp) + drm_rect_rotate_inv(&r_pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, &crtc_state->adjusted_mode); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 7bc8a9f0657a..f342fc5ae41e 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1286,6 +1286,8 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl, link_info.rate = ctrl->link->link_params.rate; link_info.capabilities = DP_LINK_CAP_ENHANCED_FRAMING; + dp_link_reset_phy_params_vx_px(ctrl->link); + dp_aux_link_configure(ctrl->aux, &link_info); if (drm_dp_max_downspread(dpcd)) diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index a916b5f3b317..6ff6c9ef351f 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -90,22 +90,22 @@ static int dp_panel_read_dpcd(struct dp_panel *dp_panel) static u32 dp_panel_get_supported_bpp(struct dp_panel *dp_panel, u32 mode_edid_bpp, u32 mode_pclk_khz) { - struct dp_link_info *link_info; + const struct dp_link_info *link_info; const u32 max_supported_bpp = 30, min_supported_bpp = 18; - u32 bpp = 0, data_rate_khz = 0; + u32 bpp, data_rate_khz; - bpp = min_t(u32, mode_edid_bpp, max_supported_bpp); + bpp = min(mode_edid_bpp, max_supported_bpp); link_info = &dp_panel->link_info; data_rate_khz = link_info->num_lanes * link_info->rate * 8; - while (bpp > min_supported_bpp) { + do { if (mode_pclk_khz * bpp <= data_rate_khz) - break; + return bpp; bpp -= 6; - } + } while (bpp > min_supported_bpp); - return bpp; + return min_supported_bpp; } int dp_panel_read_sink_caps(struct dp_panel *dp_panel, @@ -423,8 +423,9 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel) drm_mode->clock); drm_dbg_dp(panel->drm_dev, "bpp = %d\n", dp_panel->dp_mode.bpp); - dp_panel->dp_mode.bpp = max_t(u32, 18, - min_t(u32, dp_panel->dp_mode.bpp, 30)); + dp_panel->dp_mode.bpp = dp_panel_get_mode_bpp(dp_panel, dp_panel->dp_mode.bpp, + dp_panel->dp_mode.drm_mode.clock); + drm_dbg_dp(panel->drm_dev, "updated bpp = %d\n", dp_panel->dp_mode.bpp); diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index d90b9471ba6f..faa88fd6eb4d 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -577,7 +577,7 @@ static const struct msm_mdss_data sc7180_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, .ubwc_static = 0x1e, - .highest_bank_bit = 0x3, + .highest_bank_bit = 0x1, .reg_bus_bw = 76800, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c index adc60b25f8e6..0af01a0ec601 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c @@ -205,7 +205,8 @@ nvkm_firmware_dtor(struct nvkm_firmware *fw) break; case NVKM_FIRMWARE_IMG_DMA: nvkm_memory_unref(&memory); - dma_free_coherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), fw->img, fw->phys); + dma_free_noncoherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), + fw->img, fw->phys, DMA_TO_DEVICE); break; case NVKM_FIRMWARE_IMG_SGT: nvkm_memory_unref(&memory); @@ -236,10 +237,12 @@ nvkm_firmware_ctor(const struct nvkm_firmware_func *func, const char *name, break; case NVKM_FIRMWARE_IMG_DMA: { dma_addr_t addr; - len = ALIGN(fw->len, PAGE_SIZE); - fw->img = dma_alloc_coherent(fw->device->dev, len, &addr, GFP_KERNEL); + fw->img = dma_alloc_noncoherent(fw->device->dev, + len, &addr, + DMA_TO_DEVICE, + GFP_KERNEL); if (fw->img) { memcpy(fw->img, src, fw->len); fw->phys = addr; diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c index 80a480b12174..a1c8545f1249 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c @@ -89,6 +89,12 @@ nvkm_falcon_fw_boot(struct nvkm_falcon_fw *fw, struct nvkm_subdev *user, nvkm_falcon_fw_dtor_sigs(fw); } + /* after last write to the img, sync dma mappings */ + dma_sync_single_for_device(fw->fw.device->dev, + fw->fw.phys, + sg_dma_len(&fw->fw.mem.sgl), + DMA_TO_DEVICE); + FLCNFW_DBG(fw, "resetting"); fw->func->reset(fw); diff --git a/drivers/gpu/drm/omapdrm/Kconfig b/drivers/gpu/drm/omapdrm/Kconfig index 3f7139e211d2..64e440a2649b 100644 --- a/drivers/gpu/drm/omapdrm/Kconfig +++ b/drivers/gpu/drm/omapdrm/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_OMAP tristate "OMAP DRM" + depends on MMU depends on DRM && OF depends on ARCH_OMAP2PLUS || (COMPILE_TEST && PAGE_SIZE_LESS_THAN_64KB) select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index ef8d7f147465..d1c5e471bdca 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -2179,7 +2179,7 @@ assigned: void radeon_atom_encoder_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_encoder *encoder; list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index b5e96a8fc2c1..11a492f21157 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7585,7 +7585,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7615,7 +7615,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7645,7 +7645,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); + drm_handle_vblank(rdev_to_drm(rdev), 2); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7675,7 +7675,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); + drm_handle_vblank(rdev_to_drm(rdev), 3); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7705,7 +7705,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); + drm_handle_vblank(rdev_to_drm(rdev), 4); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -7735,7 +7735,7 @@ restart_ih: DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); + drm_handle_vblank(rdev_to_drm(rdev), 5); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -8581,7 +8581,7 @@ int cik_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 4c06f47453fd..d6ab93ed9ec4 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -91,7 +91,7 @@ struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev) pin = &rdev->audio.pin[i]; pin_count = 0; - list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { if (radeon_encoder_is_digital(encoder)) { radeon_encoder = to_radeon_encoder(encoder); dig = radeon_encoder->enc_priv; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index c634dc28e6c3..bc4ab71613a5 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -1673,7 +1673,7 @@ void evergreen_pm_misc(struct radeon_device *rdev) */ void evergreen_pm_prepare(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -1698,7 +1698,7 @@ void evergreen_pm_prepare(struct radeon_device *rdev) */ void evergreen_pm_finish(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -1763,7 +1763,7 @@ void evergreen_hpd_set_polarity(struct radeon_device *rdev, */ void evergreen_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enabled = 0; u32 tmp = DC_HPDx_CONNECTION_TIMER(0x9c4) | @@ -1804,7 +1804,7 @@ void evergreen_hpd_init(struct radeon_device *rdev) */ void evergreen_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disabled = 0; @@ -4753,7 +4753,7 @@ restart_ih: event_name = "vblank"; if (rdev->irq.crtc_vblank_int[crtc_idx]) { - drm_handle_vblank(rdev->ddev, crtc_idx); + drm_handle_vblank(rdev_to_drm(rdev), crtc_idx); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -5211,7 +5211,7 @@ int evergreen_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); /* initialize AGP */ diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index e5577d2a19ef..a46613283393 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -397,7 +397,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1; @@ -435,14 +435,14 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i return r; } - offset = track->cb_color_bo_offset[id] << 8; + offset = (u64)track->cb_color_bo_offset[id] << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d cb[%d] bo base %llu not aligned with %ld\n", __func__, __LINE__, id, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->cb_color_bo[id])) { /* old ddx are broken they allocate bo with w*h*bpp but * program slice with ALIGN(h, 8), catch this and patch @@ -450,14 +450,14 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i */ if (!surf.mode) { uint32_t *ib = p->ib.ptr; - unsigned long tmp, nby, bsize, size, min = 0; + u64 tmp, nby, bsize, size, min = 0; /* find the height the ddx wants */ if (surf.nby > 8) { min = surf.nby - 8; } bsize = radeon_bo_size(track->cb_color_bo[id]); - tmp = track->cb_color_bo_offset[id] << 8; + tmp = (u64)track->cb_color_bo_offset[id] << 8; for (nby = surf.nby; nby > min; nby--) { size = nby * surf.nbx * surf.bpe * surf.nsamples; if ((tmp + size * mslice) <= bsize) { @@ -469,7 +469,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i slice = ((nby * surf.nbx) / 64) - 1; if (!evergreen_surface_check(p, &surf, "cb")) { /* check if this one works */ - tmp += surf.layer_size * mslice; + tmp += (u64)surf.layer_size * mslice; if (tmp <= bsize) { ib[track->cb_color_slice_idx[id]] = slice; goto old_ddx_ok; @@ -478,9 +478,9 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i } } dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, " - "offset %d, max layer %d, bo size %ld, slice %d)\n", + "offset %llu, max layer %d, bo size %ld, slice %d)\n", __func__, __LINE__, id, surf.layer_size, - track->cb_color_bo_offset[id] << 8, mslice, + (u64)track->cb_color_bo_offset[id] << 8, mslice, radeon_bo_size(track->cb_color_bo[id]), slice); dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n", __func__, __LINE__, surf.nbx, surf.nby, @@ -564,7 +564,7 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; @@ -610,18 +610,18 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return r; } - offset = track->db_s_read_offset << 8; + offset = (u64)track->db_s_read_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_s_read_bo)) { dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_s_read_offset << 8, mslice, + (u64)track->db_s_read_offset << 8, mslice, radeon_bo_size(track->db_s_read_bo)); dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", __func__, __LINE__, track->db_depth_size, @@ -629,18 +629,18 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return -EINVAL; } - offset = track->db_s_write_offset << 8; + offset = (u64)track->db_s_write_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_s_write_bo)) { dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_s_write_offset << 8, mslice, + (u64)track->db_s_write_offset << 8, mslice, radeon_bo_size(track->db_s_write_bo)); return -EINVAL; } @@ -661,7 +661,7 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; @@ -708,34 +708,34 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) return r; } - offset = track->db_z_read_offset << 8; + offset = (u64)track->db_z_read_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_z_read_bo)) { dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_z_read_offset << 8, mslice, + (u64)track->db_z_read_offset << 8, mslice, radeon_bo_size(track->db_z_read_bo)); return -EINVAL; } - offset = track->db_z_write_offset << 8; + offset = (u64)track->db_z_write_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_z_write_bo)) { dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_z_write_offset << 8, mslice, + (u64)track->db_z_write_offset << 8, mslice, radeon_bo_size(track->db_z_write_bo)); return -EINVAL; } diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 77aee99e473a..3890911fe693 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2360,7 +2360,7 @@ int cayman_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); /* initialize memory controller */ diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 0b1e19345f43..80703417d8a1 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -459,7 +459,7 @@ void r100_pm_misc(struct radeon_device *rdev) */ void r100_pm_prepare(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -490,7 +490,7 @@ void r100_pm_prepare(struct radeon_device *rdev) */ void r100_pm_finish(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -603,7 +603,7 @@ void r100_hpd_set_polarity(struct radeon_device *rdev, */ void r100_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enable = 0; @@ -626,7 +626,7 @@ void r100_hpd_init(struct radeon_device *rdev) */ void r100_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disable = 0; @@ -798,7 +798,7 @@ int r100_irq_process(struct radeon_device *rdev) /* Vertical blank interrupts */ if (status & RADEON_CRTC_VBLANK_STAT) { if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -807,7 +807,7 @@ int r100_irq_process(struct radeon_device *rdev) } if (status & RADEON_CRTC2_VBLANK_STAT) { if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -1016,45 +1016,65 @@ static int r100_cp_init_microcode(struct radeon_device *rdev) DRM_DEBUG_KMS("\n"); - if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) || - (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) || - (rdev->family == CHIP_RS200)) { + switch (rdev->family) { + case CHIP_R100: + case CHIP_RV100: + case CHIP_RV200: + case CHIP_RS100: + case CHIP_RS200: DRM_INFO("Loading R100 Microcode\n"); fw_name = FIRMWARE_R100; - } else if ((rdev->family == CHIP_R200) || - (rdev->family == CHIP_RV250) || - (rdev->family == CHIP_RV280) || - (rdev->family == CHIP_RS300)) { + break; + + case CHIP_R200: + case CHIP_RV250: + case CHIP_RV280: + case CHIP_RS300: DRM_INFO("Loading R200 Microcode\n"); fw_name = FIRMWARE_R200; - } else if ((rdev->family == CHIP_R300) || - (rdev->family == CHIP_R350) || - (rdev->family == CHIP_RV350) || - (rdev->family == CHIP_RV380) || - (rdev->family == CHIP_RS400) || - (rdev->family == CHIP_RS480)) { + break; + + case CHIP_R300: + case CHIP_R350: + case CHIP_RV350: + case CHIP_RV380: + case CHIP_RS400: + case CHIP_RS480: DRM_INFO("Loading R300 Microcode\n"); fw_name = FIRMWARE_R300; - } else if ((rdev->family == CHIP_R420) || - (rdev->family == CHIP_R423) || - (rdev->family == CHIP_RV410)) { + break; + + case CHIP_R420: + case CHIP_R423: + case CHIP_RV410: DRM_INFO("Loading R400 Microcode\n"); fw_name = FIRMWARE_R420; - } else if ((rdev->family == CHIP_RS690) || - (rdev->family == CHIP_RS740)) { + break; + + case CHIP_RS690: + case CHIP_RS740: DRM_INFO("Loading RS690/RS740 Microcode\n"); fw_name = FIRMWARE_RS690; - } else if (rdev->family == CHIP_RS600) { + break; + + case CHIP_RS600: DRM_INFO("Loading RS600 Microcode\n"); fw_name = FIRMWARE_RS600; - } else if ((rdev->family == CHIP_RV515) || - (rdev->family == CHIP_R520) || - (rdev->family == CHIP_RV530) || - (rdev->family == CHIP_R580) || - (rdev->family == CHIP_RV560) || - (rdev->family == CHIP_RV570)) { + break; + + case CHIP_RV515: + case CHIP_R520: + case CHIP_RV530: + case CHIP_R580: + case CHIP_RV560: + case CHIP_RV570: DRM_INFO("Loading R500 Microcode\n"); fw_name = FIRMWARE_R520; + break; + + default: + DRM_ERROR("Unsupported Radeon family %u\n", rdev->family); + return -EINVAL; } err = request_firmware(&rdev->me_fw, fw_name, rdev->dev); @@ -1471,7 +1491,7 @@ int r100_cs_packet_parse_vline(struct radeon_cs_parser *p) header = radeon_get_ib_value(p, h_idx); crtc_id = radeon_get_ib_value(p, h_idx + 5); reg = R100_CP_PACKET0_GET_REG(header); - crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id); + crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id); if (!crtc) { DRM_ERROR("cannot find crtc %d\n", crtc_id); return -ENOENT; @@ -3059,7 +3079,7 @@ DEFINE_SHOW_ATTRIBUTE(r100_debugfs_mc_info); void r100_debugfs_rbbm_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r100_rbbm_info", 0444, root, rdev, &r100_debugfs_rbbm_info_fops); @@ -3069,7 +3089,7 @@ void r100_debugfs_rbbm_init(struct radeon_device *rdev) void r100_debugfs_cp_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r100_cp_ring_info", 0444, root, rdev, &r100_debugfs_cp_ring_info_fops); @@ -3081,7 +3101,7 @@ void r100_debugfs_cp_init(struct radeon_device *rdev) void r100_debugfs_mc_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r100_mc_info", 0444, root, rdev, &r100_debugfs_mc_info_fops); @@ -3947,7 +3967,7 @@ int r100_resume(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* post */ - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */ r100_clock_startup(rdev); /* Initialize surface registers */ @@ -4056,7 +4076,7 @@ int r100_init(struct radeon_device *rdev) /* Set asic errata */ r100_errata(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 1620f534f55f..05c13102a8cb 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -616,7 +616,7 @@ DEFINE_SHOW_ATTRIBUTE(rv370_debugfs_pcie_gart_info); static void rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("rv370_pcie_gart_info", 0444, root, rdev, &rv370_debugfs_pcie_gart_info_fops); @@ -1452,7 +1452,7 @@ int r300_resume(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* post */ - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */ r300_clock_startup(rdev); /* Initialize surface registers */ @@ -1538,7 +1538,7 @@ int r300_init(struct radeon_device *rdev) /* Set asic errata */ r300_errata(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index a979662eaa73..9a31cdec6415 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -322,7 +322,7 @@ int r420_resume(struct radeon_device *rdev) if (rdev->is_atom_bios) { atom_asic_init(rdev->mode_info.atom_context); } else { - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); } /* Resume clock after posting */ r420_clock_resume(rdev); @@ -414,7 +414,7 @@ int r420_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); @@ -493,7 +493,7 @@ DEFINE_SHOW_ATTRIBUTE(r420_debugfs_pipes_info); void r420_debugfs_pipes_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r420_pipes_info", 0444, root, rdev, &r420_debugfs_pipes_info_fops); diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index 6cbcaa845192..08e127b3249a 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -287,7 +287,7 @@ int r520_init(struct radeon_device *rdev) atom_asic_init(rdev->mode_info.atom_context); } /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 087d41e370fd..8b62f7faa5b9 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -950,7 +950,7 @@ void r600_hpd_set_polarity(struct radeon_device *rdev, void r600_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enable = 0; @@ -1017,7 +1017,7 @@ void r600_hpd_init(struct radeon_device *rdev) void r600_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disable = 0; @@ -3280,7 +3280,7 @@ int r600_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); if (rdev->flags & RADEON_IS_AGP) { @@ -4136,7 +4136,7 @@ restart_ih: DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -4166,7 +4166,7 @@ restart_ih: DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -4358,7 +4358,7 @@ DEFINE_SHOW_ATTRIBUTE(r600_debugfs_mc_info); static void r600_debugfs_mc_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("r600_mc_info", 0444, root, rdev, &r600_debugfs_mc_info_fops); diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 6cf54a747749..1b2d31c4d77c 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -884,7 +884,7 @@ int r600_cs_common_vline_parse(struct radeon_cs_parser *p, crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1); reg = R600_CP_PACKET0_GET_REG(header); - crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id); + crtc = drm_crtc_find(rdev_to_drm(p->rdev), p->filp, crtc_id); if (!crtc) { DRM_ERROR("cannot find crtc %d\n", crtc_id); return -ENOENT; diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index 64980a61d38a..81d58ef667dd 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -153,7 +153,7 @@ void r600_dpm_print_ps_status(struct radeon_device *rdev, u32 r600_dpm_get_vblank_time(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 vblank_in_pixels; @@ -180,7 +180,7 @@ u32 r600_dpm_get_vblank_time(struct radeon_device *rdev) u32 r600_dpm_get_vrefresh(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 vrefresh = 0; diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index f3551ebaa2f0..661f374f5f27 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -116,7 +116,7 @@ void r600_audio_update_hdmi(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, audio_work); - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct r600_audio_pin audio_status = r600_audio_status(rdev); struct drm_encoder *encoder; bool changed = false; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 0999c8eaae94..fd8a4513025f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2297,7 +2297,7 @@ typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t); struct radeon_device { struct device *dev; - struct drm_device *ddev; + struct drm_device ddev; struct pci_dev *pdev; #ifdef __alpha__ struct pci_controller *hose; @@ -2476,6 +2476,11 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index); void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v); +static inline struct drm_device *rdev_to_drm(struct radeon_device *rdev) +{ + return &rdev->ddev; +} + /* * Cast helper */ diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c index 603a78e41ba5..22ce61bdfc06 100644 --- a/drivers/gpu/drm/radeon/radeon_acpi.c +++ b/drivers/gpu/drm/radeon/radeon_acpi.c @@ -405,11 +405,11 @@ static int radeon_atif_handler(struct radeon_device *rdev, if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { if ((rdev->flags & RADEON_IS_PX) && radeon_atpx_dgpu_req_power_for_displays()) { - pm_runtime_get_sync(rdev->ddev->dev); + pm_runtime_get_sync(rdev_to_drm(rdev)->dev); /* Just fire off a uevent and let userspace tell us what to do */ - drm_helper_hpd_irq_event(rdev->ddev); - pm_runtime_mark_last_busy(rdev->ddev->dev); - pm_runtime_put_autosuspend(rdev->ddev->dev); + drm_helper_hpd_irq_event(rdev_to_drm(rdev)); + pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); + pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); } } /* TODO: check other events */ @@ -736,7 +736,7 @@ int radeon_acpi_init(struct radeon_device *rdev) struct radeon_encoder *target = NULL; /* Find the encoder controlling the brightness */ - list_for_each_entry(tmp, &rdev->ddev->mode_config.encoder_list, + list_for_each_entry(tmp, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { struct radeon_encoder *enc = to_radeon_encoder(tmp); diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c index a3d749e350f9..89d7b0e9e79f 100644 --- a/drivers/gpu/drm/radeon/radeon_agp.c +++ b/drivers/gpu/drm/radeon/radeon_agp.c @@ -161,7 +161,7 @@ struct radeon_agp_head *radeon_agp_head_init(struct drm_device *dev) static int radeon_agp_head_acquire(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct pci_dev *pdev = to_pci_dev(dev->dev); if (!rdev->agp) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 10793a433bf5..81a0a91921b9 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -187,7 +187,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev) if (i2c.valid) { sprintf(stmp, "0x%x", i2c.i2c_id); - rdev->i2c_bus[i] = radeon_i2c_create(rdev->ddev, &i2c, stmp); + rdev->i2c_bus[i] = radeon_i2c_create(rdev_to_drm(rdev), &i2c, stmp); } gpio = (ATOM_GPIO_I2C_ASSIGMENT *) ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT)); @@ -1716,27 +1716,25 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct case LCD_FAKE_EDID_PATCH_RECORD_TYPE: fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record; if (fake_edid_record->ucFakeEDIDLength) { - struct edid *edid; - int edid_size = - max((int)EDID_LENGTH, (int)fake_edid_record->ucFakeEDIDLength); - edid = kmalloc(edid_size, GFP_KERNEL); - if (edid) { - memcpy((u8 *)edid, (u8 *)&fake_edid_record->ucFakeEDIDString[0], - fake_edid_record->ucFakeEDIDLength); - - if (drm_edid_is_valid(edid)) { - rdev->mode_info.bios_hardcoded_edid = edid; - rdev->mode_info.bios_hardcoded_edid_size = edid_size; - } else - kfree(edid); - } + const struct drm_edid *edid; + int edid_size; + + if (fake_edid_record->ucFakeEDIDLength == 128) + edid_size = fake_edid_record->ucFakeEDIDLength; + else + edid_size = fake_edid_record->ucFakeEDIDLength * 128; + edid = drm_edid_alloc(fake_edid_record->ucFakeEDIDString, edid_size); + if (drm_edid_valid(edid)) + rdev->mode_info.bios_hardcoded_edid = edid; + else + drm_edid_free(edid); + record += struct_size(fake_edid_record, + ucFakeEDIDString, + edid_size); + } else { + /* empty fake edid record must be 3 bytes long */ + record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; } - record += fake_edid_record->ucFakeEDIDLength ? - struct_size(fake_edid_record, - ucFakeEDIDString, - fake_edid_record->ucFakeEDIDLength) : - /* empty fake edid record must be 3 bytes long */ - sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1; break; case LCD_PANEL_RESOLUTION_RECORD_TYPE: panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 0bcd767b9f47..47aa06a9a942 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -196,7 +196,7 @@ static void radeon_audio_enable(struct radeon_device *rdev, return; if (rdev->mode_info.mode_config_initialized) { - list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { if (radeon_encoder_is_digital(encoder)) { radeon_encoder = to_radeon_encoder(encoder); dig = radeon_encoder->enc_priv; @@ -760,7 +760,7 @@ static int radeon_audio_component_get_eld(struct device *kdev, int port, if (!rdev->audio.enabled || !rdev->mode_info.mode_config_initialized) return 0; - list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) { + list_for_each_entry(encoder, &rdev_to_drm(rdev)->mode_config.encoder_list, head) { if (!radeon_encoder_is_digital(encoder)) continue; radeon_encoder = to_radeon_encoder(encoder); diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index 6952b1273b0f..df8d7f56b028 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -370,27 +370,22 @@ static uint16_t combios_get_table_offset(struct drm_device *dev, bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) { int edid_info, size; - struct edid *edid; + const struct drm_edid *edid; unsigned char *raw; - edid_info = combios_get_table_offset(rdev->ddev, COMBIOS_HARDCODED_EDID_TABLE); + edid_info = combios_get_table_offset(rdev_to_drm(rdev), COMBIOS_HARDCODED_EDID_TABLE); if (!edid_info) return false; raw = rdev->bios + edid_info; size = EDID_LENGTH * (raw[0x7e] + 1); - edid = kmalloc(size, GFP_KERNEL); - if (edid == NULL) - return false; - - memcpy((unsigned char *)edid, raw, size); + edid = drm_edid_alloc(raw, size); - if (!drm_edid_is_valid(edid)) { - kfree(edid); + if (!drm_edid_valid(edid)) { + drm_edid_free(edid); return false; } rdev->mode_info.bios_hardcoded_edid = edid; - rdev->mode_info.bios_hardcoded_edid_size = size; return true; } @@ -398,18 +393,7 @@ bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev) struct edid * radeon_bios_get_hardcoded_edid(struct radeon_device *rdev) { - struct edid *edid; - - if (rdev->mode_info.bios_hardcoded_edid) { - edid = kmalloc(rdev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL); - if (edid) { - memcpy((unsigned char *)edid, - (unsigned char *)rdev->mode_info.bios_hardcoded_edid, - rdev->mode_info.bios_hardcoded_edid_size); - return edid; - } - } - return NULL; + return drm_edid_duplicate(drm_edid_raw(rdev->mode_info.bios_hardcoded_edid)); } static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rdev, @@ -642,7 +626,7 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde static struct radeon_i2c_bus_rec radeon_combios_get_i2c_info_from_table(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct radeon_i2c_bus_rec i2c; u16 offset; u8 id, blocks, clk, data; @@ -670,7 +654,7 @@ static struct radeon_i2c_bus_rec radeon_combios_get_i2c_info_from_table(struct r void radeon_combios_i2c_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct radeon_i2c_bus_rec i2c; /* actual hw pads @@ -812,7 +796,7 @@ bool radeon_combios_get_clock_info(struct drm_device *dev) bool radeon_combios_sideport_present(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); u16 igp_info; /* sideport is AMD only */ @@ -915,7 +899,7 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct enum radeon_tv_std radeon_combios_get_tv_info(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); uint16_t tv_info; enum radeon_tv_std tv_std = TV_STD_NTSC; @@ -2637,7 +2621,7 @@ static const char *thermal_controller_names[] = { void radeon_combios_get_power_modes(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); u16 offset, misc, misc2 = 0; u8 rev, tmp; int state_index = 0; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 69693ba5949e..528a8f3677c2 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -505,6 +505,9 @@ static void radeon_add_common_modes(struct drm_encoder *encoder, struct drm_conn continue; mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); + if (!mode) + continue; + drm_mode_probed_add(connector, mode); } } @@ -1056,7 +1059,7 @@ radeon_vga_detect(struct drm_connector *connector, bool force) */ if ((!rdev->is_atom_bios) && (ret == connector_status_disconnected) && - rdev->mode_info.bios_hardcoded_edid_size) { + rdev->mode_info.bios_hardcoded_edid) { ret = connector_status_connected; } @@ -1389,7 +1392,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force) out: if ((!rdev->is_atom_bios) && (ret == connector_status_disconnected) && - rdev->mode_info.bios_hardcoded_edid_size) { + rdev->mode_info.bios_hardcoded_edid) { radeon_connector->use_digital = true; ret = connector_status_connected; } diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index afbb3a80c0c6..554b236c2328 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -760,7 +760,7 @@ bool radeon_boot_test_post_card(struct radeon_device *rdev) if (rdev->is_atom_bios) atom_asic_init(rdev->mode_info.atom_context); else - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); return true; } else { dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); @@ -980,7 +980,7 @@ int radeon_atombios_init(struct radeon_device *rdev) return -ENOMEM; rdev->mode_info.atom_card_info = atom_card_info; - atom_card_info->dev = rdev->ddev; + atom_card_info->dev = rdev_to_drm(rdev); atom_card_info->reg_read = cail_reg_read; atom_card_info->reg_write = cail_reg_write; /* needed for iio ops */ @@ -1005,7 +1005,7 @@ int radeon_atombios_init(struct radeon_device *rdev) mutex_init(&rdev->mode_info.atom_context->mutex); mutex_init(&rdev->mode_info.atom_context->scratch_mutex); - radeon_atom_initialize_bios_scratch_regs(rdev->ddev); + radeon_atom_initialize_bios_scratch_regs(rdev_to_drm(rdev)); atom_allocate_fb_scratch(rdev->mode_info.atom_context); return 0; } @@ -1049,7 +1049,7 @@ void radeon_atombios_fini(struct radeon_device *rdev) */ int radeon_combios_init(struct radeon_device *rdev) { - radeon_combios_initialize_bios_scratch_regs(rdev->ddev); + radeon_combios_initialize_bios_scratch_regs(rdev_to_drm(rdev)); return 0; } @@ -1285,9 +1285,6 @@ int radeon_device_init(struct radeon_device *rdev, bool runtime = false; rdev->shutdown = false; - rdev->dev = &pdev->dev; - rdev->ddev = ddev; - rdev->pdev = pdev; rdev->flags = flags; rdev->family = flags & RADEON_FAMILY_MASK; rdev->is_atom_bios = false; @@ -1847,7 +1844,7 @@ int radeon_gpu_reset(struct radeon_device *rdev) downgrade_write(&rdev->exclusive_lock); - drm_helper_resume_force_mode(rdev->ddev); + drm_helper_resume_force_mode(rdev_to_drm(rdev)); /* set the power state here in case we are a PX system or headless */ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 843383f7237f..8f5f8abcb1b4 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -302,13 +302,13 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) if ((radeon_use_pflipirq == 2) && ASIC_IS_DCE4(rdev)) return; - spin_lock_irqsave(&rdev->ddev->event_lock, flags); + spin_lock_irqsave(&rdev_to_drm(rdev)->event_lock, flags); if (radeon_crtc->flip_status != RADEON_FLIP_SUBMITTED) { DRM_DEBUG_DRIVER("radeon_crtc->flip_status = %d != " "RADEON_FLIP_SUBMITTED(%d)\n", radeon_crtc->flip_status, RADEON_FLIP_SUBMITTED); - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); return; } @@ -334,7 +334,7 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) */ if (update_pending && (DRM_SCANOUTPOS_VALID & - radeon_get_crtc_scanoutpos(rdev->ddev, crtc_id, + radeon_get_crtc_scanoutpos(rdev_to_drm(rdev), crtc_id, GET_DISTANCE_TO_VBLANKSTART, &vpos, &hpos, NULL, NULL, &rdev->mode_info.crtcs[crtc_id]->base.hwmode)) && @@ -347,7 +347,7 @@ void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id) */ update_pending = 0; } - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); if (!update_pending) radeon_crtc_handle_flip(rdev, crtc_id); } @@ -370,14 +370,14 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id) if (radeon_crtc == NULL) return; - spin_lock_irqsave(&rdev->ddev->event_lock, flags); + spin_lock_irqsave(&rdev_to_drm(rdev)->event_lock, flags); work = radeon_crtc->flip_work; if (radeon_crtc->flip_status != RADEON_FLIP_SUBMITTED) { DRM_DEBUG_DRIVER("radeon_crtc->flip_status = %d != " "RADEON_FLIP_SUBMITTED(%d)\n", radeon_crtc->flip_status, RADEON_FLIP_SUBMITTED); - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); return; } @@ -389,7 +389,7 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id) if (work->event) drm_crtc_send_vblank_event(&radeon_crtc->base, work->event); - spin_unlock_irqrestore(&rdev->ddev->event_lock, flags); + spin_unlock_irqrestore(&rdev_to_drm(rdev)->event_lock, flags); drm_crtc_vblank_put(&radeon_crtc->base); radeon_irq_kms_pflip_irq_put(rdev, work->crtc_id); @@ -408,7 +408,7 @@ static void radeon_flip_work_func(struct work_struct *__work) struct radeon_flip_work *work = container_of(__work, struct radeon_flip_work, flip_work); struct radeon_device *rdev = work->rdev; - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[work->crtc_id]; struct drm_crtc *crtc = &radeon_crtc->base; @@ -1401,7 +1401,7 @@ static int radeon_modeset_create_props(struct radeon_device *rdev) if (rdev->is_atom_bios) { rdev->mode_info.coherent_mode_property = - drm_property_create_range(rdev->ddev, 0 , "coherent", 0, 1); + drm_property_create_range(rdev_to_drm(rdev), 0, "coherent", 0, 1); if (!rdev->mode_info.coherent_mode_property) return -ENOMEM; } @@ -1409,57 +1409,57 @@ static int radeon_modeset_create_props(struct radeon_device *rdev) if (!ASIC_IS_AVIVO(rdev)) { sz = ARRAY_SIZE(radeon_tmds_pll_enum_list); rdev->mode_info.tmds_pll_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "tmds_pll", radeon_tmds_pll_enum_list, sz); } rdev->mode_info.load_detect_property = - drm_property_create_range(rdev->ddev, 0, "load detection", 0, 1); + drm_property_create_range(rdev_to_drm(rdev), 0, "load detection", 0, 1); if (!rdev->mode_info.load_detect_property) return -ENOMEM; - drm_mode_create_scaling_mode_property(rdev->ddev); + drm_mode_create_scaling_mode_property(rdev_to_drm(rdev)); sz = ARRAY_SIZE(radeon_tv_std_enum_list); rdev->mode_info.tv_std_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "tv standard", radeon_tv_std_enum_list, sz); sz = ARRAY_SIZE(radeon_underscan_enum_list); rdev->mode_info.underscan_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "underscan", radeon_underscan_enum_list, sz); rdev->mode_info.underscan_hborder_property = - drm_property_create_range(rdev->ddev, 0, + drm_property_create_range(rdev_to_drm(rdev), 0, "underscan hborder", 0, 128); if (!rdev->mode_info.underscan_hborder_property) return -ENOMEM; rdev->mode_info.underscan_vborder_property = - drm_property_create_range(rdev->ddev, 0, + drm_property_create_range(rdev_to_drm(rdev), 0, "underscan vborder", 0, 128); if (!rdev->mode_info.underscan_vborder_property) return -ENOMEM; sz = ARRAY_SIZE(radeon_audio_enum_list); rdev->mode_info.audio_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "audio", radeon_audio_enum_list, sz); sz = ARRAY_SIZE(radeon_dither_enum_list); rdev->mode_info.dither_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "dither", radeon_dither_enum_list, sz); sz = ARRAY_SIZE(radeon_output_csc_enum_list); rdev->mode_info.output_csc_property = - drm_property_create_enum(rdev->ddev, 0, + drm_property_create_enum(rdev_to_drm(rdev), 0, "output_csc", radeon_output_csc_enum_list, sz); @@ -1578,29 +1578,29 @@ int radeon_modeset_init(struct radeon_device *rdev) int i; int ret; - drm_mode_config_init(rdev->ddev); + drm_mode_config_init(rdev_to_drm(rdev)); rdev->mode_info.mode_config_initialized = true; - rdev->ddev->mode_config.funcs = &radeon_mode_funcs; + rdev_to_drm(rdev)->mode_config.funcs = &radeon_mode_funcs; if (radeon_use_pflipirq == 2 && rdev->family >= CHIP_R600) - rdev->ddev->mode_config.async_page_flip = true; + rdev_to_drm(rdev)->mode_config.async_page_flip = true; if (ASIC_IS_DCE5(rdev)) { - rdev->ddev->mode_config.max_width = 16384; - rdev->ddev->mode_config.max_height = 16384; + rdev_to_drm(rdev)->mode_config.max_width = 16384; + rdev_to_drm(rdev)->mode_config.max_height = 16384; } else if (ASIC_IS_AVIVO(rdev)) { - rdev->ddev->mode_config.max_width = 8192; - rdev->ddev->mode_config.max_height = 8192; + rdev_to_drm(rdev)->mode_config.max_width = 8192; + rdev_to_drm(rdev)->mode_config.max_height = 8192; } else { - rdev->ddev->mode_config.max_width = 4096; - rdev->ddev->mode_config.max_height = 4096; + rdev_to_drm(rdev)->mode_config.max_width = 4096; + rdev_to_drm(rdev)->mode_config.max_height = 4096; } - rdev->ddev->mode_config.preferred_depth = 24; - rdev->ddev->mode_config.prefer_shadow = 1; + rdev_to_drm(rdev)->mode_config.preferred_depth = 24; + rdev_to_drm(rdev)->mode_config.prefer_shadow = 1; - rdev->ddev->mode_config.fb_modifiers_not_supported = true; + rdev_to_drm(rdev)->mode_config.fb_modifiers_not_supported = true; ret = radeon_modeset_create_props(rdev); if (ret) { @@ -1618,11 +1618,11 @@ int radeon_modeset_init(struct radeon_device *rdev) /* allocate crtcs */ for (i = 0; i < rdev->num_crtc; i++) { - radeon_crtc_init(rdev->ddev, i); + radeon_crtc_init(rdev_to_drm(rdev), i); } /* okay we should have all the bios connectors */ - ret = radeon_setup_enc_conn(rdev->ddev); + ret = radeon_setup_enc_conn(rdev_to_drm(rdev)); if (!ret) { return ret; } @@ -1639,7 +1639,7 @@ int radeon_modeset_init(struct radeon_device *rdev) /* setup afmt */ radeon_afmt_init(rdev); - drm_kms_helper_poll_init(rdev->ddev); + drm_kms_helper_poll_init(rdev_to_drm(rdev)); /* do pm late init */ ret = radeon_pm_late_init(rdev); @@ -1650,15 +1650,15 @@ int radeon_modeset_init(struct radeon_device *rdev) void radeon_modeset_fini(struct radeon_device *rdev) { if (rdev->mode_info.mode_config_initialized) { - drm_kms_helper_poll_fini(rdev->ddev); + drm_kms_helper_poll_fini(rdev_to_drm(rdev)); radeon_hpd_fini(rdev); - drm_helper_force_disable_all(rdev->ddev); + drm_helper_force_disable_all(rdev_to_drm(rdev)); radeon_afmt_fini(rdev); - drm_mode_config_cleanup(rdev->ddev); + drm_mode_config_cleanup(rdev_to_drm(rdev)); rdev->mode_info.mode_config_initialized = false; } - kfree(rdev->mode_info.bios_hardcoded_edid); + drm_edid_free(rdev->mode_info.bios_hardcoded_edid); /* free i2c buses */ radeon_i2c_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 7bf08164140e..f36aa71c57c7 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -259,7 +259,8 @@ static int radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { unsigned long flags = 0; - struct drm_device *dev; + struct drm_device *ddev; + struct radeon_device *rdev; int ret; if (!ent) @@ -300,28 +301,37 @@ static int radeon_pci_probe(struct pci_dev *pdev, if (ret) return ret; - dev = drm_dev_alloc(&kms_driver, &pdev->dev); - if (IS_ERR(dev)) - return PTR_ERR(dev); + rdev = devm_drm_dev_alloc(&pdev->dev, &kms_driver, typeof(*rdev), ddev); + if (IS_ERR(rdev)) + return PTR_ERR(rdev); + + rdev->dev = &pdev->dev; + rdev->pdev = pdev; + ddev = rdev_to_drm(rdev); + ddev->dev_private = rdev; ret = pci_enable_device(pdev); if (ret) goto err_free; - pci_set_drvdata(pdev, dev); + pci_set_drvdata(pdev, ddev); + + ret = radeon_driver_load_kms(ddev, flags); + if (ret) + goto err_agp; - ret = drm_dev_register(dev, ent->driver_data); + ret = drm_dev_register(ddev, flags); if (ret) goto err_agp; - radeon_fbdev_setup(dev->dev_private); + radeon_fbdev_setup(ddev->dev_private); return 0; err_agp: pci_disable_device(pdev); err_free: - drm_dev_put(dev); + drm_dev_put(ddev); return ret; } @@ -569,7 +579,6 @@ static const struct drm_ioctl_desc radeon_ioctls_kms[] = { static const struct drm_driver kms_driver = { .driver_features = DRIVER_GEM | DRIVER_RENDER | DRIVER_MODESET, - .load = radeon_driver_load_kms, .open = radeon_driver_open_kms, .postclose = radeon_driver_postclose_kms, .unload = radeon_driver_unload_kms, diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c index 02bf25759059..fb70de29545c 100644 --- a/drivers/gpu/drm/radeon/radeon_fbdev.c +++ b/drivers/gpu/drm/radeon/radeon_fbdev.c @@ -67,7 +67,7 @@ static int radeon_fbdev_create_pinned_object(struct drm_fb_helper *fb_helper, int height = mode_cmd->height; u32 cpp; - info = drm_get_format_info(rdev->ddev, mode_cmd); + info = drm_get_format_info(rdev_to_drm(rdev), mode_cmd); cpp = info->cpp[0]; /* need to align pitch with crtc limits */ @@ -148,15 +148,15 @@ static int radeon_fbdev_fb_open(struct fb_info *info, int user) struct radeon_device *rdev = fb_helper->dev->dev_private; int ret; - ret = pm_runtime_get_sync(rdev->ddev->dev); + ret = pm_runtime_get_sync(rdev_to_drm(rdev)->dev); if (ret < 0 && ret != -EACCES) goto err_pm_runtime_mark_last_busy; return 0; err_pm_runtime_mark_last_busy: - pm_runtime_mark_last_busy(rdev->ddev->dev); - pm_runtime_put_autosuspend(rdev->ddev->dev); + pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); + pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); return ret; } @@ -165,8 +165,8 @@ static int radeon_fbdev_fb_release(struct fb_info *info, int user) struct drm_fb_helper *fb_helper = info->par; struct radeon_device *rdev = fb_helper->dev->dev_private; - pm_runtime_mark_last_busy(rdev->ddev->dev); - pm_runtime_put_autosuspend(rdev->ddev->dev); + pm_runtime_mark_last_busy(rdev_to_drm(rdev)->dev); + pm_runtime_put_autosuspend(rdev_to_drm(rdev)->dev); return 0; } @@ -236,7 +236,7 @@ static int radeon_fbdev_fb_helper_fb_probe(struct drm_fb_helper *fb_helper, ret = -ENOMEM; goto err_radeon_fbdev_destroy_pinned_object; } - ret = radeon_framebuffer_init(rdev->ddev, fb, &mode_cmd, gobj); + ret = radeon_framebuffer_init(rdev_to_drm(rdev), fb, &mode_cmd, gobj); if (ret) { DRM_ERROR("failed to initialize framebuffer %d\n", ret); goto err_kfree; @@ -374,12 +374,12 @@ void radeon_fbdev_setup(struct radeon_device *rdev) fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL); if (!fb_helper) return; - drm_fb_helper_prepare(rdev->ddev, fb_helper, bpp_sel, &radeon_fbdev_fb_helper_funcs); + drm_fb_helper_prepare(rdev_to_drm(rdev), fb_helper, bpp_sel, &radeon_fbdev_fb_helper_funcs); - ret = drm_client_init(rdev->ddev, &fb_helper->client, "radeon-fbdev", + ret = drm_client_init(rdev_to_drm(rdev), &fb_helper->client, "radeon-fbdev", &radeon_fbdev_client_funcs); if (ret) { - drm_err(rdev->ddev, "Failed to register client: %d\n", ret); + drm_err(rdev_to_drm(rdev), "Failed to register client: %d\n", ret); goto err_drm_client_init; } @@ -394,13 +394,13 @@ err_drm_client_init: void radeon_fbdev_set_suspend(struct radeon_device *rdev, int state) { - if (rdev->ddev->fb_helper) - drm_fb_helper_set_suspend(rdev->ddev->fb_helper, state); + if (rdev_to_drm(rdev)->fb_helper) + drm_fb_helper_set_suspend(rdev_to_drm(rdev)->fb_helper, state); } bool radeon_fbdev_robj_is_fb(struct radeon_device *rdev, struct radeon_bo *robj) { - struct drm_fb_helper *fb_helper = rdev->ddev->fb_helper; + struct drm_fb_helper *fb_helper = rdev_to_drm(rdev)->fb_helper; struct drm_gem_object *gobj; if (!fb_helper) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 4fb780d96f32..daff61586be5 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -150,7 +150,7 @@ int radeon_fence_emit(struct radeon_device *rdev, rdev->fence_context + ring, seq); radeon_fence_ring_emit(rdev, ring, *fence); - trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); + trace_radeon_fence_emit(rdev_to_drm(rdev), ring, (*fence)->seq); radeon_fence_schedule_check(rdev, ring); return 0; } @@ -489,7 +489,7 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, if (!target_seq[i]) continue; - trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); + trace_radeon_fence_wait_begin(rdev_to_drm(rdev), i, target_seq[i]); radeon_irq_kms_sw_irq_get(rdev, i); } @@ -511,7 +511,7 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, continue; radeon_irq_kms_sw_irq_put(rdev, i); - trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); + trace_radeon_fence_wait_end(rdev_to_drm(rdev), i, target_seq[i]); } return r; @@ -995,7 +995,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(radeon_debugfs_gpu_reset_fops, void radeon_debugfs_fence_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_gpu_reset", 0444, root, rdev, &radeon_debugfs_gpu_reset_fops); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index e66a230331ee..9735f4968b86 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -88,7 +88,7 @@ static void radeon_gem_object_free(struct drm_gem_object *gobj) if (robj) { radeon_mn_unregister(robj); - radeon_bo_unref(&robj); + ttm_bo_put(&robj->tbo); } } @@ -899,7 +899,7 @@ DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_gem_info); void radeon_gem_debugfs_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_gem_info", 0444, root, rdev, &radeon_debugfs_gem_info_fops); diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c index 3d174390a8af..1f16619ed06e 100644 --- a/drivers/gpu/drm/radeon/radeon_i2c.c +++ b/drivers/gpu/drm/radeon/radeon_i2c.c @@ -1011,7 +1011,7 @@ void radeon_i2c_add(struct radeon_device *rdev, struct radeon_i2c_bus_rec *rec, const char *name) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); int i; for (i = 0; i < RADEON_MAX_I2C_BUS; i++) { diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 63d914f3414d..1aa41cc3f991 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -309,7 +309,7 @@ DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_sa_info); static void radeon_debugfs_sa_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_sa_info", 0444, root, rdev, &radeon_debugfs_sa_info_fops); diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index c4dda908666c..9961251b44ba 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -80,7 +80,7 @@ static void radeon_hotplug_work_func(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, hotplug_work.work); - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; @@ -101,7 +101,7 @@ static void radeon_dp_work_func(struct work_struct *work) { struct radeon_device *rdev = container_of(work, struct radeon_device, dp_work); - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; @@ -197,7 +197,7 @@ static void radeon_driver_irq_uninstall_kms(struct drm_device *dev) static int radeon_irq_install(struct radeon_device *rdev, int irq) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); int ret; if (irq == IRQ_NOTCONNECTED) @@ -218,7 +218,7 @@ static int radeon_irq_install(struct radeon_device *rdev, int irq) static void radeon_irq_uninstall(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct pci_dev *pdev = to_pci_dev(dev->dev); radeon_driver_irq_uninstall_kms(dev); @@ -322,9 +322,9 @@ int radeon_irq_kms_init(struct radeon_device *rdev) spin_lock_init(&rdev->irq.lock); /* Disable vblank irqs aggressively for power-saving */ - rdev->ddev->vblank_disable_immediate = true; + rdev_to_drm(rdev)->vblank_disable_immediate = true; - r = drm_vblank_init(rdev->ddev, rdev->num_crtc); + r = drm_vblank_init(rdev_to_drm(rdev), rdev->num_crtc); if (r) { return r; } diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index a16590c6247f..645e33bf7947 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -104,15 +104,9 @@ done_free: int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) { struct pci_dev *pdev = to_pci_dev(dev->dev); - struct radeon_device *rdev; + struct radeon_device *rdev = dev->dev_private; int r, acpi_status; - rdev = kzalloc(sizeof(struct radeon_device), GFP_KERNEL); - if (rdev == NULL) { - return -ENOMEM; - } - dev->dev_private = (void *)rdev; - #ifdef __alpha__ rdev->hose = pdev->sysdata; #endif diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index e0a5af180801..421c83fc70dc 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -39,6 +39,7 @@ #include <linux/i2c-algo-bit.h> struct edid; +struct drm_edid; struct radeon_bo; struct radeon_device; @@ -262,8 +263,7 @@ struct radeon_mode_info { /* Output CSC */ struct drm_property *output_csc_property; /* hardcoded DFP edid from BIOS */ - struct edid *bios_hardcoded_edid; - int bios_hardcoded_edid_size; + const struct drm_edid *bios_hardcoded_edid; /* firmware flags */ u16 firmware_flags; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index a955f8a2f7fe..d0e4b43d155c 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -150,7 +150,7 @@ int radeon_bo_create(struct radeon_device *rdev, bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL); if (bo == NULL) return -ENOMEM; - drm_gem_private_object_init(rdev->ddev, &bo->tbo.base, size); + drm_gem_private_object_init(rdev_to_drm(rdev), &bo->tbo.base, size); bo->rdev = rdev; bo->surface_reg = -1; INIT_LIST_HEAD(&bo->list); @@ -256,18 +256,15 @@ struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo) if (bo == NULL) return NULL; - ttm_bo_get(&bo->tbo); + drm_gem_object_get(&bo->tbo.base); return bo; } void radeon_bo_unref(struct radeon_bo **bo) { - struct ttm_buffer_object *tbo; - if ((*bo) == NULL) return; - tbo = &((*bo)->tbo); - ttm_bo_put(tbo); + drm_gem_object_put(&(*bo)->tbo.base); *bo = NULL; } diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 2d9d9f46f243..b4fb7e70320b 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -282,7 +282,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) if (rdev->irq.installed) { i = 0; - drm_for_each_crtc(crtc, rdev->ddev) { + drm_for_each_crtc(crtc, rdev_to_drm(rdev)) { if (rdev->pm.active_crtcs & (1 << i)) { /* This can fail if a modeset is in progress */ if (drm_crtc_vblank_get(crtc) == 0) @@ -299,7 +299,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) if (rdev->irq.installed) { i = 0; - drm_for_each_crtc(crtc, rdev->ddev) { + drm_for_each_crtc(crtc, rdev_to_drm(rdev)) { if (rdev->pm.req_vblank & (1 << i)) { rdev->pm.req_vblank &= ~(1 << i); drm_crtc_vblank_put(crtc); @@ -671,7 +671,7 @@ static ssize_t radeon_hwmon_show_temp(struct device *dev, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); int temp; /* Can't get temperature when the card is off */ @@ -715,7 +715,7 @@ static ssize_t radeon_hwmon_show_sclk(struct device *dev, struct device_attribute *attr, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); u32 sclk = 0; /* Can't get clock frequency when the card is off */ @@ -740,7 +740,7 @@ static ssize_t radeon_hwmon_show_vddc(struct device *dev, struct device_attribute *attr, char *buf) { struct radeon_device *rdev = dev_get_drvdata(dev); - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); u16 vddc = 0; /* Can't get vddc when the card is off */ @@ -1692,7 +1692,7 @@ void radeon_pm_fini(struct radeon_device *rdev) static void radeon_pm_compute_clocks_old(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; @@ -1765,7 +1765,7 @@ static void radeon_pm_compute_clocks_old(struct radeon_device *rdev) static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; struct radeon_connector *radeon_connector; @@ -1826,7 +1826,7 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev) */ for (crtc = 0; (crtc < rdev->num_crtc) && in_vbl; crtc++) { if (rdev->pm.active_crtcs & (1 << crtc)) { - vbl_status = radeon_get_crtc_scanoutpos(rdev->ddev, + vbl_status = radeon_get_crtc_scanoutpos(rdev_to_drm(rdev), crtc, USE_REAL_VBLANKSTART, &vpos, &hpos, NULL, NULL, @@ -1918,7 +1918,7 @@ static void radeon_dynpm_idle_work_handler(struct work_struct *work) static int radeon_debugfs_pm_info_show(struct seq_file *m, void *unused) { struct radeon_device *rdev = m->private; - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); if ((rdev->flags & RADEON_IS_PX) && (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) { @@ -1955,7 +1955,7 @@ DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_pm_info); static void radeon_debugfs_pm_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("radeon_pm_info", 0444, root, rdev, &radeon_debugfs_pm_info_fops); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 8d1d458286a8..581ae20c46e4 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -550,7 +550,7 @@ static void radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_r { #if defined(CONFIG_DEBUG_FS) const char *ring_name = radeon_debugfs_ring_idx_to_name(ring->idx); - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; if (ring_name) debugfs_create_file(ring_name, 0444, root, ring, diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 5c65b6dfb99a..69d0c12fa419 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -682,8 +682,8 @@ int radeon_ttm_init(struct radeon_device *rdev) /* No others user of address space so set it to 0 */ r = ttm_device_init(&rdev->mman.bdev, &radeon_bo_driver, rdev->dev, - rdev->ddev->anon_inode->i_mapping, - rdev->ddev->vma_offset_manager, + rdev_to_drm(rdev)->anon_inode->i_mapping, + rdev_to_drm(rdev)->vma_offset_manager, rdev->need_swiotlb, dma_addressing_limited(&rdev->pdev->dev)); if (r) { @@ -890,7 +890,7 @@ static const struct file_operations radeon_ttm_gtt_fops = { static void radeon_ttm_debugfs_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct drm_minor *minor = rdev->ddev->primary; + struct drm_minor *minor = rdev_to_drm(rdev)->primary; struct dentry *root = minor->debugfs_root; debugfs_create_file("radeon_vram", 0444, root, rdev, diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index d4d1501e6576..d6c18fd740ec 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -379,7 +379,7 @@ DEFINE_SHOW_ATTRIBUTE(rs400_debugfs_gart_info); static void rs400_debugfs_pcie_gart_info_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("rs400_gart_info", 0444, root, rdev, &rs400_debugfs_gart_info_fops); @@ -474,7 +474,7 @@ int rs400_resume(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* post */ - radeon_combios_asic_init(rdev->ddev); + radeon_combios_asic_init(rdev_to_drm(rdev)); /* Resume clock after posting */ r300_clock_startup(rdev); /* Initialize surface registers */ @@ -552,7 +552,7 @@ int rs400_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize memory controller */ rs400_mc_init(rdev); /* Fence driver */ diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 5c162778899b..88c8e91ea651 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -321,7 +321,7 @@ void rs600_pm_misc(struct radeon_device *rdev) void rs600_pm_prepare(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -339,7 +339,7 @@ void rs600_pm_prepare(struct radeon_device *rdev) void rs600_pm_finish(struct radeon_device *rdev) { - struct drm_device *ddev = rdev->ddev; + struct drm_device *ddev = rdev_to_drm(rdev); struct drm_crtc *crtc; struct radeon_crtc *radeon_crtc; u32 tmp; @@ -408,7 +408,7 @@ void rs600_hpd_set_polarity(struct radeon_device *rdev, void rs600_hpd_init(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned enable = 0; @@ -435,7 +435,7 @@ void rs600_hpd_init(struct radeon_device *rdev) void rs600_hpd_fini(struct radeon_device *rdev) { - struct drm_device *dev = rdev->ddev; + struct drm_device *dev = rdev_to_drm(rdev); struct drm_connector *connector; unsigned disable = 0; @@ -797,7 +797,7 @@ int rs600_irq_process(struct radeon_device *rdev) /* Vertical blank interrupts */ if (G_007EDC_LB_D1_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); + drm_handle_vblank(rdev_to_drm(rdev), 0); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -806,7 +806,7 @@ int rs600_irq_process(struct radeon_device *rdev) } if (G_007EDC_LB_D2_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); + drm_handle_vblank(rdev_to_drm(rdev), 1); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -1133,7 +1133,7 @@ int rs600_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize memory controller */ rs600_mc_init(rdev); r100_debugfs_rbbm_init(rdev); diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 14fb0819b8c1..016eb4992803 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -845,7 +845,7 @@ int rs690_init(struct radeon_device *rdev) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize memory controller */ rs690_mc_init(rdev); rv515_debugfs(rdev); diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index bbc6ccabf788..1b4dfb645585 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -255,7 +255,7 @@ DEFINE_SHOW_ATTRIBUTE(rv515_debugfs_ga_info); void rv515_debugfs(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - struct dentry *root = rdev->ddev->primary->debugfs_root; + struct dentry *root = rdev_to_drm(rdev)->primary->debugfs_root; debugfs_create_file("rv515_pipes_info", 0444, root, rdev, &rv515_debugfs_pipes_info_fops); @@ -636,7 +636,7 @@ int rv515_init(struct radeon_device *rdev) if (radeon_boot_test_post_card(rdev) == false) return -EINVAL; /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* initialize AGP */ if (rdev->flags & RADEON_IS_AGP) { r = radeon_agp_init(rdev); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 9ce12fa3c356..7d4b0bf59109 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1935,7 +1935,7 @@ int rv770_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); /* initialize AGP */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 15759c8ca5b7..6c95575ce109 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6277,7 +6277,7 @@ restart_ih: event_name = "vblank"; if (rdev->irq.crtc_vblank_int[crtc_idx]) { - drm_handle_vblank(rdev->ddev, crtc_idx); + drm_handle_vblank(rdev_to_drm(rdev), crtc_idx); rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } @@ -6839,7 +6839,7 @@ int si_init(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); /* Initialize clocks */ - radeon_get_clock_info(rdev->ddev); + radeon_get_clock_info(rdev_to_drm(rdev)); /* Fence driver */ radeon_fence_driver_init(rdev); diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index 44ce0f581062..42ef62aa0a1e 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -279,7 +279,6 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, const u8 *buffer, size_t len) { struct inno_hdmi *hdmi = connector_to_inno_hdmi(connector); - u8 packed_frame[HDMI_MAXIMUM_INFO_FRAME_SIZE]; ssize_t i; if (type != HDMI_INFOFRAME_TYPE_AVI) { @@ -291,8 +290,7 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, inno_hdmi_disable_frame(connector, type); for (i = 0; i < len; i++) - hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, - packed_frame[i]); + hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, buffer[i]); return 0; } diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index f7f7d8b0f61f..fd4215e2f982 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -89,6 +89,17 @@ static void drm_gem_shmem_test_obj_create_private(struct kunit *test) sg_init_one(sgt->sgl, buf, TEST_SIZE); + /* + * Set the DMA mask to 64-bits and map the sgtables + * otherwise drm_gem_shmem_free will cause a warning + * on debug kernels. + */ + ret = dma_set_mask(drm_dev->dev, DMA_BIT_MASK(64)); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = dma_map_sgtable(drm_dev->dev, sgt, DMA_BIDIRECTIONAL, 0); + KUNIT_ASSERT_EQ(test, ret, 0); + /* Init a mock DMA-BUF */ buf_mock.size = TEST_SIZE; attach_mock.dmabuf = &buf_mock; diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 42d4f4a2dba2..fa6859dd8368 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -317,7 +317,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; - int i, csd_cfg0_reg, csd_cfg_reg_count; + int i, csd_cfg0_reg; v3d->csd_job = job; @@ -337,9 +337,17 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) v3d_switch_perfmon(v3d, &job->base); csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver); - csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7; - for (i = 1; i <= csd_cfg_reg_count; i++) + for (i = 1; i <= 6; i++) V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]); + + /* Although V3D 7.1 has an eighth configuration register, we are not + * using it. Therefore, make sure it remains unused. + * + * XXX: Set the CFG7 register + */ + if (v3d->ver >= 71) + V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0); + /* CFG0 write kicks off the job. */ V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]); diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 1ff9602a52f6..b9670ae09a9e 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -28,7 +28,6 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ xe-y += xe_bb.o \ xe_bo.o \ xe_bo_evict.o \ - xe_debugfs.o \ xe_devcoredump.o \ xe_device.o \ xe_device_sysfs.o \ @@ -46,7 +45,6 @@ xe-y += xe_bb.o \ xe_gt.o \ xe_gt_ccs_mode.o \ xe_gt_clock.o \ - xe_gt_debugfs.o \ xe_gt_freq.o \ xe_gt_idle.o \ xe_gt_mcr.o \ @@ -59,7 +57,6 @@ xe-y += xe_bb.o \ xe_guc_ads.o \ xe_guc_ct.o \ xe_guc_db_mgr.o \ - xe_guc_debugfs.o \ xe_guc_hwconfig.o \ xe_guc_id_mgr.o \ xe_guc_klv_helpers.o \ @@ -69,9 +66,9 @@ xe-y += xe_bb.o \ xe_heci_gsc.o \ xe_hw_engine.o \ xe_hw_engine_class_sysfs.o \ + xe_hw_engine_group.o \ xe_hw_fence.o \ xe_huc.o \ - xe_huc_debugfs.o \ xe_irq.o \ xe_lrc.o \ xe_migrate.o \ @@ -107,7 +104,6 @@ xe-y += xe_bb.o \ xe_ttm_vram_mgr.o \ xe_tuning.o \ xe_uc.o \ - xe_uc_debugfs.o \ xe_uc_fw.o \ xe_vm.o \ xe_vram.o \ @@ -124,7 +120,6 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o # graphics virtualization (SR-IOV) support xe-y += \ xe_gt_sriov_vf.o \ - xe_gt_sriov_vf_debugfs.o \ xe_guc_relay.o \ xe_memirq.o \ xe_sriov.o @@ -133,7 +128,6 @@ xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ xe_gt_sriov_pf_config.o \ xe_gt_sriov_pf_control.o \ - xe_gt_sriov_pf_debugfs.o \ xe_gt_sriov_pf_monitor.o \ xe_gt_sriov_pf_policy.o \ xe_gt_sriov_pf_service.o \ @@ -281,6 +275,16 @@ ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y) endif ifeq ($(CONFIG_DEBUG_FS),y) + xe-y += xe_debugfs.o \ + xe_gt_debugfs.o \ + xe_gt_sriov_vf_debugfs.o \ + xe_gt_stats.o \ + xe_guc_debugfs.o \ + xe_huc_debugfs.o \ + xe_uc_debugfs.o + + xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o + xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_display_debugfs.o \ i915-display/intel_display_debugfs_params.o \ diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 8f9f60b28306..6b30743a2f6c 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -351,6 +351,7 @@ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005, GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, }; #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 2feedddf1e40..b7b12b20e390 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -80,11 +80,6 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_MOBILE(xe) (xe && 0) -#define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270 - -/* Workarounds not handled yet */ -#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; }) - #define IS_LP(xe) (0) #define IS_GEN9_LP(xe) (0) #define IS_GEN9_BC(xe) (0) @@ -116,7 +111,6 @@ struct i915_sched_attr { #define i915_gem_fence_wait_priority(fence, attr) do { (void) attr; } while (0) #define pdev_to_i915 pdev_to_xe_device -#define RUNTIME_INFO(xe) (&(xe)->info.i915_runtime) #define FORCEWAKE_ALL XE_FORCEWAKE_ALL diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h index a20d2638ea7a..bdae8392e125 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h @@ -7,7 +7,8 @@ #define I915_VMA_H #include <uapi/drm/i915_drm.h> -#include <drm/drm_mm.h> + +#include "xe_ggtt_types.h" /* We don't want these from i915_drm.h in case of Xe */ #undef I915_TILING_X @@ -19,7 +20,7 @@ struct xe_bo; struct i915_vma { struct xe_bo *bo, *dpt; - struct drm_mm_node node; + struct xe_ggtt_node *node; }; #define i915_ggtt_clear_scanout(bo) do { } while (0) @@ -28,7 +29,7 @@ struct i915_vma { static inline u32 i915_ggtt_offset(const struct i915_vma *vma) { - return vma->node.start; + return vma->node->base.start; } #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h index 0006ef812346..2cf13a572ab0 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_step.h @@ -6,15 +6,9 @@ #ifndef __INTEL_STEP_H__ #define __INTEL_STEP_H__ -#include "xe_device_types.h" #include "xe_step.h" -#define intel_display_step_name xe_display_step_name - -static inline -const char *xe_display_step_name(struct xe_device *xe) -{ - return xe_step_name(xe->info.step.display); -} +#define intel_step xe_step +#define intel_step_name xe_step_name #endif /* __INTEL_STEP_H__ */ diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c index f835492f73fb..63ce97cc4cfe 100644 --- a/drivers/gpu/drm/xe/display/intel_fb_bo.c +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -7,6 +7,7 @@ #include <drm/ttm/ttm_bo.h> #include "intel_display_types.h" +#include "intel_fb.h" #include "intel_fb_bo.h" #include "xe_bo.h" @@ -28,6 +29,14 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, struct xe_device *xe = to_xe_device(bo->ttm.base.dev); int ret; + /* + * Some modifiers require physical alignment of 64KiB VRAM pages; + * require that the BO in those cases is created correctly. + */ + if (XE_IOCTL_DBG(xe, intel_fb_needs_64k_phys(mode_cmd->modifier[0]) && + !(bo->flags & XE_BO_FLAG_NEEDS_64K))) + return -EINVAL; + xe_bo_get(bo); ret = ttm_bo_reserve(&bo->ttm, true, false, NULL); diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 8b83dcff72e1..78a884ddd499 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -46,7 +46,7 @@ static bool has_display(struct xe_device *xe) */ bool xe_display_driver_probe_defer(struct pci_dev *pdev) { - if (!xe_modparam.enable_display) + if (!xe_modparam.probe_display) return 0; return intel_display_driver_probe_defer(pdev); @@ -62,7 +62,7 @@ bool xe_display_driver_probe_defer(struct pci_dev *pdev) */ void xe_display_driver_set_hooks(struct drm_driver *driver) { - if (!xe_modparam.enable_display) + if (!xe_modparam.probe_display) return; driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC; @@ -104,7 +104,7 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) { struct xe_device *xe = to_xe_device(dev); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_power_domains_cleanup(xe); @@ -112,7 +112,7 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) int xe_display_init_nommio(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; /* Fake uncore lock */ @@ -127,24 +127,27 @@ int xe_display_init_nommio(struct xe_device *xe) static void xe_display_fini_noirq(void *arg) { struct xe_device *xe = arg; + struct intel_display *display = &xe->display; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove_noirq(xe); + intel_opregion_cleanup(display); } int xe_display_init_noirq(struct xe_device *xe) { + struct intel_display *display = &xe->display; int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; intel_display_driver_early_probe(xe); /* Early display init.. */ - intel_opregion_setup(xe); + intel_opregion_setup(display); /* * Fill the dram structure to get the system dram info. This will be @@ -157,8 +160,10 @@ int xe_display_init_noirq(struct xe_device *xe) intel_display_device_info_runtime_init(xe); err = intel_display_driver_probe_noirq(xe); - if (err) + if (err) { + intel_opregion_cleanup(display); return err; + } return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe); } @@ -167,7 +172,7 @@ static void xe_display_fini_noaccel(void *arg) { struct xe_device *xe = arg; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove_nogem(xe); @@ -177,7 +182,7 @@ int xe_display_init_noaccel(struct xe_device *xe) { int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; err = intel_display_driver_probe_nogem(xe); @@ -189,7 +194,7 @@ int xe_display_init_noaccel(struct xe_device *xe) int xe_display_init(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return 0; return intel_display_driver_probe(xe); @@ -197,7 +202,7 @@ int xe_display_init(struct xe_device *xe) void xe_display_fini(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_hpd_poll_fini(xe); @@ -208,7 +213,7 @@ void xe_display_fini(struct xe_device *xe) void xe_display_register(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_register(xe); @@ -218,7 +223,7 @@ void xe_display_register(struct xe_device *xe) void xe_display_unregister(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_unregister_dsm_handler(); @@ -228,7 +233,7 @@ void xe_display_unregister(struct xe_device *xe) void xe_display_driver_remove(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_driver_remove(xe); @@ -238,7 +243,7 @@ void xe_display_driver_remove(struct xe_device *xe) void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (master_ctl & DISPLAY_IRQ) @@ -247,16 +252,18 @@ void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) { - if (!xe->info.enable_display) + struct intel_display *display = &xe->display; + + if (!xe->info.probe_display) return; if (gu_misc_iir & GU_MISC_GSE) - intel_opregion_asle_intr(xe); + intel_opregion_asle_intr(display); } void xe_display_irq_reset(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; gen11_display_irq_reset(xe); @@ -264,7 +271,7 @@ void xe_display_irq_reset(struct xe_device *xe) void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; if (gt->info.id == XE_GT0) @@ -280,10 +287,44 @@ static bool suspend_to_idle(void) return false; } +static void xe_display_flush_cleanup_work(struct xe_device *xe) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&xe->drm, crtc) { + struct drm_crtc_commit *commit; + + spin_lock(&crtc->base.commit_lock); + commit = list_first_entry_or_null(&crtc->base.commit_list, + struct drm_crtc_commit, commit_entry); + if (commit) + drm_crtc_commit_get(commit); + spin_unlock(&crtc->base.commit_lock); + + if (commit) { + wait_for_completion(&commit->cleanup_done); + drm_crtc_commit_put(commit); + } + } +} + +/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */ +void xe_display_pm_runtime_suspend(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + if (xe->d3cold.allowed) + xe_display_pm_suspend(xe, true); + + intel_hpd_poll_enable(xe); +} + void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { + struct intel_display *display = &xe->display; bool s2idle = suspend_to_idle(); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; /* @@ -291,21 +332,27 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) * properly. */ intel_power_domains_disable(xe); - if (has_display(xe)) + intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + if (!runtime && has_display(xe)) { drm_kms_helper_poll_disable(&xe->drm); - - if (!runtime) + intel_display_driver_disable_user_access(xe); intel_display_driver_suspend(xe); + } + + xe_display_flush_cleanup_work(xe); + + xe_display_flush_cleanup_work(xe); intel_dp_mst_suspend(xe); intel_hpd_cancel_work(xe); - intel_encoder_suspend_all(&xe->display); + if (!runtime && has_display(xe)) + intel_display_driver_suspend_access(xe); - intel_opregion_suspend(xe, s2idle ? PCI_D1 : PCI_D3cold); + intel_encoder_suspend_all(&xe->display); - intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_SUSPENDED, true); + intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); intel_dmc_suspend(xe); } @@ -313,7 +360,7 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) void xe_display_pm_suspend_late(struct xe_device *xe) { bool s2idle = suspend_to_idle(); - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_power_domains_suspend(xe, s2idle); @@ -321,9 +368,20 @@ void xe_display_pm_suspend_late(struct xe_device *xe) intel_display_power_suspend_late(xe); } +void xe_display_pm_runtime_resume(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + intel_hpd_poll_disable(xe); + + if (xe->d3cold.allowed) + xe_display_pm_resume(xe, true); +} + void xe_display_pm_resume_early(struct xe_device *xe) { - if (!xe->info.enable_display) + if (!xe->info.probe_display) return; intel_display_power_resume_early(xe); @@ -333,7 +391,9 @@ void xe_display_pm_resume_early(struct xe_device *xe) void xe_display_pm_resume(struct xe_device *xe, bool runtime) { - if (!xe->info.enable_display) + struct intel_display *display = &xe->display; + + if (!xe->info.probe_display) return; intel_dmc_resume(xe); @@ -344,16 +404,19 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_display_driver_init_hw(xe); intel_hpd_init(xe); + if (!runtime && has_display(xe)) + intel_display_driver_resume_access(xe); + /* MST sideband requires HPD interrupts enabled */ intel_dp_mst_resume(xe); - if (!runtime) + if (!runtime && has_display(xe)) { intel_display_driver_resume(xe); - - intel_hpd_poll_disable(xe); - if (has_display(xe)) drm_kms_helper_poll_enable(&xe->drm); + intel_display_driver_enable_user_access(xe); + intel_hpd_poll_disable(xe); + } - intel_opregion_resume(xe); + intel_opregion_resume(display); intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false); @@ -371,7 +434,7 @@ int xe_display_probe(struct xe_device *xe) { int err; - if (!xe->info.enable_display) + if (!xe->info.probe_display) goto no_display; intel_display_device_probe(xe); @@ -384,7 +447,7 @@ int xe_display_probe(struct xe_device *xe) return 0; no_display: - xe->info.enable_display = false; + xe->info.probe_display = false; unset_display_features(xe); return 0; } diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 000fb5799df5..53d727fd792b 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -38,6 +38,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime); void xe_display_pm_suspend_late(struct xe_device *xe); void xe_display_pm_resume_early(struct xe_device *xe); void xe_display_pm_resume(struct xe_device *xe, bool runtime); +void xe_display_pm_runtime_suspend(struct xe_device *xe); +void xe_display_pm_runtime_resume(struct xe_device *xe); #else @@ -67,6 +69,8 @@ static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {} static inline void xe_display_pm_suspend_late(struct xe_device *xe) {} static inline void xe_display_pm_resume_early(struct xe_device *xe) {} static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {} +static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {} +static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {} #endif /* CONFIG_DRM_XE_DISPLAY */ #endif /* _XE_DISPLAY_H_ */ diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d7db44e79eaf..d650c5ac41a4 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -204,21 +204,28 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node.size && view->type == I915_GTT_VIEW_NORMAL) { + if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) { vma->node = bo->ggtt_node; } else if (view->type == I915_GTT_VIEW_NORMAL) { u32 x, size = bo->ttm.base.size; - ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, - align, 0); - if (ret) + vma->node = xe_ggtt_node_init(ggtt); + if (IS_ERR(vma->node)) { + ret = PTR_ERR(vma->node); goto out_unlock; + } + + ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + if (ret) { + xe_ggtt_node_fini(vma->node); + goto out_unlock; + } for (x = 0; x < size; x += XE_PAGE_SIZE) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, xe->pat.idx[XE_CACHE_NONE]); - ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.start + x, pte); + ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node->base.start + x, pte); } } else { u32 i, ggtt_ofs; @@ -227,12 +234,19 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, /* display seems to use tiles instead of bytes here, so convert it back.. */ u32 size = intel_rotation_info_size(rot_info) * XE_PAGE_SIZE; - ret = xe_ggtt_insert_special_node_locked(ggtt, &vma->node, size, - align, 0); - if (ret) + vma->node = xe_ggtt_node_init(ggtt); + if (IS_ERR(vma->node)) { + ret = PTR_ERR(vma->node); + goto out_unlock; + } + + ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0); + if (ret) { + xe_ggtt_node_fini(vma->node); goto out_unlock; + } - ggtt_ofs = vma->node.start; + ggtt_ofs = vma->node->base.start; for (i = 0; i < ARRAY_SIZE(rot_info->plane); i++) write_ggtt_rotated(bo, ggtt, &ggtt_ofs, @@ -320,14 +334,11 @@ err: static void __xe_unpin_fb_vma(struct i915_vma *vma) { - struct xe_device *xe = to_xe_device(vma->bo->ttm.base.dev); - struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt; - if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) || - vma->bo->ggtt_node.start != vma->node.start) - xe_ggtt_remove_node(ggtt, &vma->node, false); + else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) || + vma->bo->ggtt_node->base.start != vma->node->base.start) + xe_ggtt_node_remove(vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); ttm_bo_unpin(&vma->bo->ttm); @@ -377,8 +388,8 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) } /* - * For Xe introduce dummy intel_dpt_create which just return NULL and - * intel_dpt_destroy which does nothing. + * For Xe introduce dummy intel_dpt_create which just return NULL, + * intel_dpt_destroy which does nothing, and fake intel_dpt_ofsset returning 0; */ struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb) { @@ -389,3 +400,8 @@ void intel_dpt_destroy(struct i915_address_space *vm) { return; } + +u64 intel_dpt_offset(struct i915_vma *dpt_vma) +{ + return 0; +} diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 990285aa9b26..0af667ebebf9 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -40,10 +40,14 @@ bool intel_hdcp_gsc_check_status(struct xe_device *xe) { struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *gt = tile->media_gt; + struct xe_gsc *gsc = >->uc.gsc; bool ret = true; - if (!xe_uc_fw_is_enabled(>->uc.gsc.fw)) + if (!gsc && !xe_uc_fw_is_enabled(&gsc->fw)) { + drm_dbg_kms(&xe->drm, + "GSC Components not ready for HDCP2.x\n"); return false; + } xe_pm_runtime_get(xe); if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)) { @@ -53,7 +57,7 @@ bool intel_hdcp_gsc_check_status(struct xe_device *xe) goto out; } - if (!xe_gsc_proxy_init_done(>->uc.gsc)) + if (!xe_gsc_proxy_init_done(gsc)) ret = false; xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index c38db2a74614..81b71903675e 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -104,6 +104,7 @@ #define CSFE_CHICKEN1(base) XE_REG((base) + 0xd4, XE_REG_OPTION_MASKED) #define GHWSP_CSB_REPORT_DIS REG_BIT(15) #define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14) +#define CS_PRIORITY_MEM_READ REG_BIT(7) #define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 3b87f95f9ecf..0d1a4a9f4e11 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -80,7 +80,10 @@ #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) -#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) +#define STATELESS_COMPRESSION_CTRL XE_REG_MCR(0x4148) +#define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0) + +#define XE2_GAMREQSTRM_CTRL XE_REG_MCR(0x4194) #define CG_DIS_CNTLBUS REG_BIT(6) #define CCS_AUX_INV XE_REG(0x4208) @@ -193,6 +196,7 @@ #define GSCPSMI_BASE XE_REG(0x880c) #define CCCHKNREG1 XE_REG_MCR(0x8828) +#define L3CMPCTRL REG_BIT(23) #define ENCOMPPERFFIX REG_BIT(18) /* Fuse readout registers for GT */ @@ -367,6 +371,9 @@ #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) +#define L3SQCREG2 XE_REG_MCR(0xb104) +#define COMPMEMRD256BOVRFETCHEN REG_BIT(20) + #define L3SQCREG3 XE_REG_MCR(0xb108) #define COMPPWOVERFETCHEN REG_BIT(28) @@ -444,6 +451,7 @@ #define DIS_FIX_EOT1_FLUSH REG_BIT(9) #define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) +#define STK_ID_RESTRICT REG_BIT(12) #define SLM_WMTP_RESTORE REG_BIT(11) #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 1768483da1b7..8dac069483e8 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -36,7 +36,8 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, /* Optionally clear bo *and* CCS data in VRAM. */ if (clear) { - fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource); + fence = xe_migrate_clear(tile->migrate, bo, bo->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (IS_ERR(fence)) { KUNIT_FAIL(test, "Failed to submit bo clear.\n"); return PTR_ERR(fence); @@ -124,7 +125,7 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, kunit_info(test, "Testing system memory\n"); bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); + bo_flags); if (IS_ERR(bo)) { KUNIT_FAIL(test, "Failed to create bo.\n"); return; @@ -205,7 +206,6 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc xe_vm_lock(vm, false); bo = xe_bo_create_user(xe, NULL, vm, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); xe_vm_unlock(vm); if (IS_ERR(bo)) { @@ -215,7 +215,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc external = xe_bo_create_user(xe, NULL, NULL, 0x10000, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags); + bo_flags); if (IS_ERR(external)) { KUNIT_FAIL(test, "external bo create err=%pe\n", external); goto cleanup_bo; diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index c24c8509227e..13db6c0530b3 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -126,7 +126,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) kunit_info(test, "running %s\n", __func__); bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, params->mem_mask); + params->mem_mask); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 4344a1724029..1a192a2a941b 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -105,7 +105,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, } xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); - fence = xe_migrate_clear(m, remote, remote->ttm.resource); + fence = xe_migrate_clear(m, remote, remote->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : "Clearing remote small bo", test)) { retval = xe_map_rd(xe, &remote->vmap, 0, u64); @@ -279,7 +280,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) kunit_info(test, "Clearing small buffer object\n"); xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); expected = 0; - fence = xe_migrate_clear(m, tiny, tiny->ttm.resource); + fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clearing small bo", test)) goto out; @@ -300,7 +302,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) kunit_info(test, "Clearing big buffer object\n"); xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); expected = 0; - fence = xe_migrate_clear(m, big, big->ttm.resource); + fence = xe_migrate_clear(m, big, big->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clearing big bo", test)) goto out; @@ -603,7 +606,8 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, kunit_info(test, "Clear vram buffer object\n"); expected = 0x0000000000000000; - fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource); + fence = xe_migrate_clear(tile->migrate, vram_bo, vram_bo->ttm.resource, + XE_MIGRATE_CLEAR_FLAG_FULL); if (sanity_fence_failed(xe, fence, "Clear vram_bo", test)) return; dma_fence_put(fence); @@ -637,7 +641,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til long ret; sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, - DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device, + DRM_XE_GEM_CPU_CACHING_WC, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(sys_bo)) { @@ -660,8 +664,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(sys_bo); - ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + DRM_XE_GEM_CPU_CACHING_WC, + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(ccs_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -683,8 +688,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til } xe_bo_unlock(ccs_bo); - vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, + DRM_XE_GEM_CPU_CACHING_WC, + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(vram_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(vram_bo)); diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 577ee7d14381..67404863087e 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -12,58 +12,6 @@ #include <kunit/test-bug.h> #include <kunit/visibility.h> -struct kunit_test_data { - int ndevs; - xe_device_fn xe_fn; -}; - -static int dev_to_xe_device_fn(struct device *dev, void *__data) - -{ - struct drm_device *drm = dev_get_drvdata(dev); - struct kunit_test_data *data = __data; - int ret = 0; - int idx; - - data->ndevs++; - - if (drm_dev_enter(drm, &idx)) - ret = data->xe_fn(to_xe_device(dev_get_drvdata(dev))); - drm_dev_exit(idx); - - return ret; -} - -/** - * xe_call_for_each_device - Iterate over all devices this driver binds to - * @xe_fn: Function to call for each device. - * - * This function iterated over all devices this driver binds to, and calls - * @xe_fn: for each one of them. If the called function returns anything else - * than 0, iteration is stopped and the return value is returned by this - * function. Across each function call, drm_dev_enter() / drm_dev_exit() is - * called for the corresponding drm device. - * - * Return: Number of devices iterated or - * the error code of a call to @xe_fn returning an error code. - */ -int xe_call_for_each_device(xe_device_fn xe_fn) -{ - int ret; - struct kunit_test_data data = { - .xe_fn = xe_fn, - .ndevs = 0, - }; - - ret = driver_for_each_device(&xe_pci_driver.driver, NULL, - &data, dev_to_xe_device_fn); - - if (!data.ndevs) - kunit_skip(current->kunit_test, "test runs only on hardware\n"); - - return ret ?: data.ndevs; -} - /** * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs * @xe_fn: Function to call for each device. diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index 3e2558bc3c90..ede46800aff1 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -19,7 +19,6 @@ typedef int (*xe_device_fn)(struct xe_device *); typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *); typedef void (*xe_media_fn)(const struct xe_media_desc *); -int xe_call_for_each_device(xe_device_fn xe_fn); void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); void xe_call_for_each_media_ip(xe_media_fn xe_fn); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3295bc92d7aa..25d0c939ba31 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -793,8 +793,16 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } } } else { - if (move_lacks_source) - fence = xe_migrate_clear(migrate, bo, new_mem); + if (move_lacks_source) { + u32 flags = 0; + + if (mem_type_is_vram(new_mem->mem_type)) + flags |= XE_MIGRATE_CLEAR_FLAG_FULL; + else if (handle_system_ccs) + flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; + + fence = xe_migrate_clear(migrate, bo, new_mem, flags); + } else fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem, handle_system_ccs); @@ -1090,7 +1098,7 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); - if (bo->ggtt_node.size) + if (bo->ggtt_node && bo->ggtt_node->base.size) xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); #ifdef CONFIG_PROC_FS @@ -1491,11 +1499,10 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u16 cpu_caching, - enum ttm_bo_type type, u32 flags) { struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, - cpu_caching, type, + cpu_caching, ttm_bo_type_device, flags | XE_BO_FLAG_USER); if (!IS_ERR(bo)) xe_bo_unlock_vm_held(bo); @@ -1576,7 +1583,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, return bo; } -static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg) +static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); } @@ -1591,7 +1598,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile if (IS_ERR(bo)) return bo; - ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo); + ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo); if (ret) return ERR_PTR(ret); @@ -1639,7 +1646,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str if (IS_ERR(bo)) return PTR_ERR(bo); - drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src); + devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src); *src = bo; return 0; @@ -1990,6 +1997,13 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1); + /* CCS formats need physical placement at a 64K alignment in VRAM. */ + if ((bo_flags & XE_BO_FLAG_VRAM_MASK) && + (bo_flags & XE_BO_FLAG_SCANOUT) && + !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) && + IS_ALIGNED(args->size, SZ_64K)) + bo_flags |= XE_BO_FLAG_NEEDS_64K; + if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) { if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK))) return -EINVAL; @@ -2019,7 +2033,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, } bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching, - ttm_bo_type_device, bo_flags); + bo_flags); if (vm) xe_vm_unlock(vm); @@ -2325,7 +2339,6 @@ int xe_bo_dumb_create(struct drm_file *file_priv, bo = xe_bo_create_user(xe, NULL, NULL, args->size, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_NEEDS_CPU_ACCESS); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 1c9dc8adaaa3..dbfb3209615d 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -87,7 +87,6 @@ struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile, struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, u16 cpu_caching, - enum ttm_bo_type type, u32 flags); struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, struct xe_vm *vm, size_t size, @@ -195,9 +194,12 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) static inline u32 xe_bo_ggtt_addr(struct xe_bo *bo) { - XE_WARN_ON(bo->ggtt_node.size > bo->size); - XE_WARN_ON(bo->ggtt_node.start + bo->ggtt_node.size > (1ull << 32)); - return bo->ggtt_node.start; + if (XE_WARN_ON(!bo->ggtt_node)) + return 0; + + XE_WARN_ON(bo->ggtt_node->base.size > bo->size); + XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32)); + return bo->ggtt_node->base.start; } int xe_bo_vmap(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index ebc8abf7930a..2ed558ac2264 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -8,12 +8,13 @@ #include <linux/iosys-map.h> -#include <drm/drm_mm.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_execbuf_util.h> #include <drm/ttm/ttm_placement.h> +#include "xe_ggtt_types.h" + struct xe_device; struct xe_vm; @@ -39,7 +40,7 @@ struct xe_bo { /** @placement: current placement for this BO */ struct ttm_placement placement; /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ - struct drm_mm_node ggtt_node; + struct xe_ggtt_node *ggtt_node; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 1011e5d281fa..a64bae36e0e3 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -47,10 +47,9 @@ static int info(struct seq_file *m, void *data) drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100); drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100); - drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n", + drm_printf(&p, "stepping G:%s M:%s B:%s\n", xe_step_name(xe->info.step.graphics), xe_step_name(xe->info.step.media), - xe_step_name(xe->info.step.display), xe_step_name(xe->info.step.basedie)); drm_printf(&p, "is_dgfx %s\n", str_yes_no(xe->info.is_dgfx)); drm_printf(&p, "platform %d\n", xe->info.platform); diff --git a/drivers/gpu/drm/xe/xe_debugfs.h b/drivers/gpu/drm/xe/xe_debugfs.h index 715b8e2e0bd9..17f4c2f1b5e4 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.h +++ b/drivers/gpu/drm/xe/xe_debugfs.h @@ -8,6 +8,10 @@ struct xe_device; +#ifdef CONFIG_DEBUG_FS void xe_debugfs_register(struct xe_device *xe); +#else +static inline void xe_debugfs_register(struct xe_device *xe) { } +#endif #endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index d8d8ca2c19d3..bdb76e834e4c 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -66,22 +66,9 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) return &q->gt->uc.guc; } -static void xe_devcoredump_deferred_snap_work(struct work_struct *work) -{ - struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); - - /* keep going if fw fails as we still want to save the memory and SW data */ - if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) - xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); - xe_vm_snapshot_capture_delayed(ss->vm); - xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); - xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); -} - -static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, - size_t count, void *data, size_t datalen) +static ssize_t __xe_devcoredump_read(char *buffer, size_t count, + struct xe_devcoredump *coredump) { - struct xe_devcoredump *coredump = data; struct xe_device *xe; struct xe_devcoredump_snapshot *ss; struct drm_printer p; @@ -89,18 +76,11 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, struct timespec64 ts; int i; - if (!coredump) - return -ENODEV; - xe = coredump_to_xe(coredump); ss = &coredump->snapshot; - /* Ensure delayed work is captured before continuing */ - flush_work(&ss->work); - iter.data = buffer; - iter.offset = 0; - iter.start = offset; + iter.start = 0; iter.remain = count; p = drm_coredump_printer(&iter); @@ -134,10 +114,83 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, return count - iter.remain; } +static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) +{ + int i; + + xe_guc_ct_snapshot_free(ss->ct); + ss->ct = NULL; + + xe_guc_exec_queue_snapshot_free(ss->ge); + ss->ge = NULL; + + xe_sched_job_snapshot_free(ss->job); + ss->job = NULL; + + for (i = 0; i < XE_NUM_HW_ENGINES; i++) + if (ss->hwe[i]) { + xe_hw_engine_snapshot_free(ss->hwe[i]); + ss->hwe[i] = NULL; + } + + xe_vm_snapshot_free(ss->vm); + ss->vm = NULL; +} + +static void xe_devcoredump_deferred_snap_work(struct work_struct *work) +{ + struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); + struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); + + /* keep going if fw fails as we still want to save the memory and SW data */ + if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); + xe_vm_snapshot_capture_delayed(ss->vm); + xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); + xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + + /* Calculate devcoredump size */ + ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); + + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); + if (!ss->read.buffer) + return; + + __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); + xe_devcoredump_snapshot_free(ss); +} + +static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, + size_t count, void *data, size_t datalen) +{ + struct xe_devcoredump *coredump = data; + struct xe_devcoredump_snapshot *ss; + ssize_t byte_copied; + + if (!coredump) + return -ENODEV; + + ss = &coredump->snapshot; + + /* Ensure delayed work is captured before continuing */ + flush_work(&ss->work); + + if (!ss->read.buffer) + return -ENODEV; + + if (offset >= ss->read.size) + return 0; + + byte_copied = count < ss->read.size - offset ? count : + ss->read.size - offset; + memcpy(buffer, ss->read.buffer + offset, byte_copied); + + return byte_copied; +} + static void xe_devcoredump_free(void *data) { struct xe_devcoredump *coredump = data; - int i; /* Our device is gone. Nothing to do... */ if (!data || !coredump_to_xe(coredump)) @@ -145,13 +198,8 @@ static void xe_devcoredump_free(void *data) cancel_work_sync(&coredump->snapshot.work); - xe_guc_ct_snapshot_free(coredump->snapshot.ct); - xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge); - xe_sched_job_snapshot_free(coredump->snapshot.job); - for (i = 0; i < XE_NUM_HW_ENGINES; i++) - if (coredump->snapshot.hwe[i]) - xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]); - xe_vm_snapshot_free(coredump->snapshot.vm); + xe_devcoredump_snapshot_free(&coredump->snapshot); + kvfree(coredump->snapshot.read.buffer); /* To prevent stale data on next snapshot, clear everything */ memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); @@ -260,4 +308,5 @@ int xe_devcoredump_init(struct xe_device *xe) { return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm); } + #endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 923cdf72a816..440d05d77a5a 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -46,6 +46,14 @@ struct xe_devcoredump_snapshot { struct xe_sched_job_snapshot *job; /** @vm: Snapshot of VM state */ struct xe_vm_snapshot *vm; + + /** @read: devcoredump in human readable format */ + struct { + /** @read.size: size of devcoredump in human readable format */ + ssize_t size; + /** @read.buffer: buffer of devcoredump in human readable format */ + char *buffer; + } read; }; /** diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1aba6f9eaa19..b6db7e082d88 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -37,6 +37,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_hw_engine_group.h" #include "xe_hwmon.h" #include "xe_irq.h" #include "xe_memirq.h" @@ -165,6 +166,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) * vm->lock taken during xe_exec_queue_kill(). */ xa_for_each(&xef->exec_queue.xa, idx, q) { + if (q->vm && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); xe_exec_queue_kill(q); xe_exec_queue_put(q); } @@ -543,7 +546,7 @@ static void update_device_info(struct xe_device *xe) { /* disable features that are not available/applicable to VFs */ if (IS_SRIOV_VF(xe)) { - xe->info.enable_display = 0; + xe->info.probe_display = 0; xe->info.has_heci_gscfi = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1; diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index db6cc8d0d6b8..f052c06a2d2f 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -15,6 +15,11 @@ static inline struct xe_device *to_xe_device(const struct drm_device *dev) return container_of(dev, struct xe_device, drm); } +static inline struct xe_device *kdev_to_xe_device(struct device *kdev) +{ + return dev_get_drvdata(kdev); +} + static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev) { return pci_get_drvdata(pdev); @@ -134,16 +139,6 @@ static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) void xe_device_assert_mem_access(struct xe_device *xe); -static inline bool xe_device_in_fault_mode(struct xe_device *xe) -{ - return xe->usm.num_vm_in_fault_mode != 0; -} - -static inline bool xe_device_in_non_fault_mode(struct xe_device *xe) -{ - return xe->usm.num_vm_in_non_fault_mode != 0; -} - static inline bool xe_device_has_flat_ccs(struct xe_device *xe) { return xe->info.has_flat_ccs; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 5b7292a9a66d..e73fb0c23932 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -204,7 +204,7 @@ struct xe_tile { struct xe_memirq memirq; /** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */ - struct drm_mm_node ggtt_balloon[2]; + struct xe_ggtt_node *ggtt_balloon[2]; } vf; } sriov; @@ -282,8 +282,15 @@ struct xe_device { u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; - /** @info.enable_display: display enabled */ - u8 enable_display:1; + /** + * @info.probe_display: Probe display hardware. If set to + * false, the driver will behave as if there is no display + * hardware present and will not try to read/write to it in any + * way. The display hardware, if it exists, will not be + * exposed to userspace and will be left untouched in whatever + * state the firmware or bootloader left it in. + */ + u8 probe_display:1; /** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ u8 skip_mtcfg:1; /** @info.skip_pcode: skip access to PCODE uC */ @@ -298,12 +305,6 @@ struct xe_device { u8 has_atomic_enable_pte_bit:1; /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ u8 has_device_atomics_on_smem:1; - -#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) - struct { - u32 rawclk_freq; - } i915_runtime; -#endif } info; /** @irq: device interrupt state */ @@ -361,10 +362,6 @@ struct xe_device { struct xarray asid_to_vm; /** @usm.next_asid: next ASID, used to cyclical alloc asids */ u32 next_asid; - /** @usm.num_vm_in_fault_mode: number of VM in fault mode */ - u32 num_vm_in_fault_mode; - /** @usm.num_vm_in_non_fault_mode: number of VM in non-fault mode */ - u32 num_vm_in_non_fault_mode; /** @usm.lock: protects UM state */ struct mutex lock; } usm; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index f36980aa26e6..484acfbe0e61 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -14,6 +14,7 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_hw_engine_group.h" #include "xe_macros.h" #include "xe_ring_ops_types.h" #include "xe_sched_job.h" @@ -124,6 +125,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) bool write_locked, skip_retry = false; ktime_t end = 0; int err = 0; + struct xe_hw_engine_group *group; + enum xe_hw_engine_group_execution_mode mode, previous_mode; if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) || @@ -182,6 +185,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } } + group = q->hwe->hw_engine_group; + mode = xe_hw_engine_group_find_exec_mode(q); + + if (mode == EXEC_MODE_DMA_FENCE) { + err = xe_hw_engine_group_get_mode(group, mode, &previous_mode); + if (err) + goto err_syncs; + } + retry: if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) { err = down_write_killable(&vm->lock); @@ -199,7 +211,7 @@ retry: downgrade_write(&vm->lock); write_locked = false; if (err) - goto err_unlock_list; + goto err_hw_exec_mode; } if (!args->num_batch_buffer) { @@ -312,6 +324,9 @@ retry: spin_unlock(&xe->ttm.lru_lock); } + if (mode == EXEC_MODE_LR) + xe_hw_engine_group_resume_faulting_lr_jobs(group); + err_repin: if (!xe_vm_in_lr_mode(vm)) up_read(&vm->userptr.notifier_lock); @@ -324,6 +339,9 @@ err_unlock_list: up_read(&vm->lock); if (err == -EAGAIN && !skip_retry) goto retry; +err_hw_exec_mode: + if (mode == EXEC_MODE_DMA_FENCE) + xe_hw_engine_group_put(group); err_syncs: while (num_syncs--) xe_sync_entry_cleanup(&syncs[num_syncs]); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 69867a7b7c77..e53937fafd14 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -14,6 +14,7 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" +#include "xe_hw_engine_group.h" #include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -73,6 +74,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->ops = gt->exec_queue_ops; INIT_LIST_HEAD(&q->lr.link); INIT_LIST_HEAD(&q->multi_gt_link); + INIT_LIST_HEAD(&q->hw_engine_group_link); q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = @@ -105,22 +107,35 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, static int __xe_exec_queue_init(struct xe_exec_queue *q) { + struct xe_vm *vm = q->vm; int i, err; + if (vm) { + err = xe_vm_lock(vm, true); + if (err) + return err; + } + for (i = 0; i < q->width; ++i) { q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); - goto err_lrc; + goto err_unlock; } } + if (vm) + xe_vm_unlock(vm); + err = q->ops->init(q); if (err) goto err_lrc; return 0; +err_unlock: + if (vm) + xe_vm_unlock(vm); err_lrc: for (i = i - 1; i >= 0; --i) xe_lrc_put(q->lrc[i]); @@ -140,15 +155,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (IS_ERR(q)) return q; - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - goto err_post_alloc; - } - err = __xe_exec_queue_init(q); - if (vm) - xe_vm_unlock(vm); if (err) goto err_post_alloc; @@ -161,7 +168,8 @@ err_post_alloc: struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, - enum xe_engine_class class, u32 flags) + enum xe_engine_class class, + u32 flags, u64 extensions) { struct xe_hw_engine *hwe, *hwe0 = NULL; enum xe_hw_engine_id id; @@ -181,7 +189,54 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe if (!logical_mask) return ERR_PTR(-ENODEV); - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0); + return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); +} + +/** + * xe_exec_queue_create_bind() - Create bind exec queue. + * @xe: Xe device. + * @tile: tile which bind exec queue belongs to. + * @flags: exec queue creation flags + * @extensions: exec queue creation extensions + * + * Normalize bind exec queue creation. Bind exec queue is tied to migration VM + * for access to physical memory required for page table programming. On a + * faulting devices the reserved copy engine instance must be used to avoid + * deadlocking (user binds cannot get stuck behind faults as kernel binds which + * resolve faults depend on user binds). On non-faulting devices any copy engine + * can be used. + * + * Returns exec queue on success, ERR_PTR on failure + */ +struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, + struct xe_tile *tile, + u32 flags, u64 extensions) +{ + struct xe_gt *gt = tile->primary_gt; + struct xe_exec_queue *q; + struct xe_vm *migrate_vm; + + migrate_vm = xe_migrate_get_vm(tile->migrate); + if (xe->info.has_usm) { + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, + XE_ENGINE_CLASS_COPY, + gt->usm.reserved_bcs_instance, + false); + + if (!hwe) + return ERR_PTR(-EINVAL); + + q = xe_exec_queue_create(xe, migrate_vm, + BIT(hwe->logical_instance), 1, hwe, + flags, extensions); + } else { + q = xe_exec_queue_create_class(xe, gt, migrate_vm, + XE_ENGINE_CLASS_COPY, flags, + extensions); + } + xe_vm_put(migrate_vm); + + return q; } void xe_exec_queue_destroy(struct kref *ref) @@ -413,63 +468,6 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue return 0; } -static const enum xe_engine_class user_to_xe_engine_class[] = { - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, -}; - -static struct xe_hw_engine * -find_hw_engine(struct xe_device *xe, - struct drm_xe_engine_class_instance eci) -{ - u32 idx; - - if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) - return NULL; - - if (eci.gt_id >= xe->info.gt_count) - return NULL; - - idx = array_index_nospec(eci.engine_class, - ARRAY_SIZE(user_to_xe_engine_class)); - - return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), - user_to_xe_engine_class[idx], - eci.engine_instance, true); -} - -static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - if (XE_IOCTL_DBG(xe, width != 1)) - return 0; - if (XE_IOCTL_DBG(xe, num_placements != 1)) - return 0; - if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return 0; - - eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) - logical_mask |= BIT(hwe->logical_instance); - } - - return logical_mask; -} - static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, struct drm_xe_engine_class_instance *eci, u16 width, u16 num_placements) @@ -492,7 +490,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, n = j * width + i; - hwe = find_hw_engine(xe, eci[n]); + hwe = xe_hw_engine_lookup(xe, eci[n]); if (XE_IOCTL_DBG(xe, !hwe)) return 0; @@ -531,8 +529,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, struct drm_xe_engine_class_instance __user *user_eci = u64_to_user_ptr(args->instances); struct xe_hw_engine *hwe; - struct xe_vm *vm, *migrate_vm; + struct xe_vm *vm; struct xe_gt *gt; + struct xe_tile *tile; struct xe_exec_queue *q = NULL; u32 logical_mask; u32 id; @@ -557,37 +556,20 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { - for_each_gt(gt, xe, id) { - struct xe_exec_queue *new; - u32 flags; - - if (xe_gt_is_media_type(gt)) - continue; - - eci[0].gt_id = gt->info.id; - logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - /* The migration vm doesn't hold rpm ref */ - xe_pm_runtime_get_noresume(xe); - - flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); + if (XE_IOCTL_DBG(xe, args->width != 1) || + XE_IOCTL_DBG(xe, args->num_placements != 1) || + XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) + return -EINVAL; - migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); - new = xe_exec_queue_create(xe, migrate_vm, logical_mask, - args->width, hwe, flags, - args->extensions); + for_each_tile(tile, xe, id) { + struct xe_exec_queue *new; + u32 flags = EXEC_QUEUE_FLAG_VM; - xe_pm_runtime_put(xe); /* now held by engine */ + if (id) + flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; - xe_vm_put(migrate_vm); + new = xe_exec_queue_create_bind(xe, tile, flags, + args->extensions); if (IS_ERR(new)) { err = PTR_ERR(new); if (q) @@ -608,7 +590,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !logical_mask)) return -EINVAL; - hwe = find_hw_engine(xe, eci[0]); + hwe = xe_hw_engine_lookup(xe, eci[0]); if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; @@ -638,12 +620,17 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (xe_vm_in_preempt_fence_mode(vm)) { q->lr.context = dma_fence_context_alloc(1); - spin_lock_init(&q->lr.lock); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) goto put_exec_queue; } + + if (q->vm && q->hwe->hw_engine_group) { + err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); + if (err) + goto put_exec_queue; + } } mutex_lock(&xef->exec_queue.lock); @@ -794,6 +781,15 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; } +/** + * xe_exec_queue_kill - permanently stop all execution from an exec queue + * @q: The exec queue + * + * This function permanently stops all activity on an exec queue. If the queue + * is actively executing on the HW, it will be kicked off the engine; any + * pending jobs are discarded and all future submissions are rejected. + * This function is safe to call multiple times. + */ void xe_exec_queue_kill(struct xe_exec_queue *q) { struct xe_exec_queue *eq = q, *next; @@ -826,6 +822,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; + if (q->vm && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); @@ -837,10 +836,12 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { - if (q->flags & EXEC_QUEUE_FLAG_VM) + if (q->flags & EXEC_QUEUE_FLAG_VM) { lockdep_assert_held(&vm->lock); - else + } else { xe_vm_assert_held(vm); + lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); + } } /** @@ -852,10 +853,7 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) { xe_exec_queue_last_fence_lockdep_assert(q, vm); - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } + xe_exec_queue_last_fence_put_unlocked(q); } /** @@ -898,6 +896,33 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, } /** + * xe_exec_queue_last_fence_get_for_resume() - Get last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Get last fence, takes a ref. Only safe to be called in the context of + * resuming the hw engine group's long-running exec queue, when the group + * semaphore is held. + * + * Returns: last fence if not signaled, dma fence stub if signaled + */ +struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + struct dma_fence *fence; + + lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); + + if (q->last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) + xe_exec_queue_last_fence_put_unlocked(q); + + fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); + dma_fence_get(fence); + return fence; +} + +/** * xe_exec_queue_last_fence_set() - Set last fence * @q: The exec queue * @vm: The VM the engine does a bind or exec for diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index ded77b0f3b90..90c7f73eab88 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -20,7 +20,11 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v u64 extensions); struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, - enum xe_engine_class class, u32 flags); + enum xe_engine_class class, + u32 flags, u64 extensions); +struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, + struct xe_tile *tile, + u32 flags, u64 extensions); void xe_exec_queue_fini(struct xe_exec_queue *q); void xe_exec_queue_destroy(struct kref *ref); @@ -73,6 +77,8 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm); void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e); struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e, struct xe_vm *vm); +struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *e, + struct xe_vm *vm); void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm, struct dma_fence *fence); int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 1408b02eea53..7deb480e26af 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -126,8 +126,6 @@ struct xe_exec_queue { u32 seqno; /** @lr.link: link into VM's list of exec queues */ struct list_head link; - /** @lr.lock: preemption fences lock */ - spinlock_t lock; } lr; /** @ops: submission backend exec queue operations */ @@ -142,6 +140,8 @@ struct xe_exec_queue { * Protected by @vm's resv. Unused if @vm == NULL. */ u64 tlb_flush_seqno; + /** @hw_engine_group_link: link into exec queues in the same hw engine group */ + struct list_head hw_engine_group_link; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 0cdbc1296e88..f3fca5565d32 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -30,6 +30,39 @@ #include "xe_wa.h" #include "xe_wopcm.h" +/** + * DOC: Global Graphics Translation Table (GGTT) + * + * Xe GGTT implements the support for a Global Virtual Address space that is used + * for resources that are accessible to privileged (i.e. kernel-mode) processes, + * and not tied to a specific user-level process. For example, the Graphics + * micro-Controller (GuC) and Display Engine (if present) utilize this Global + * address space. + * + * The Global GTT (GGTT) translates from the Global virtual address to a physical + * address that can be accessed by HW. The GGTT is a flat, single-level table. + * + * Xe implements a simplified version of the GGTT specifically managing only a + * certain range of it that goes from the Write Once Protected Content Memory (WOPCM) + * Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to + * the GuC (Graphics Microcontroller) hardware limitations. The GuC address space + * is limited on both ends of the GGTT, because the GuC shim HW redirects + * accesses to those addresses to other HW areas instead of going through the + * GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size, + * while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things + * simple, instead of checking each object to see if they are accessed by GuC or + * not, we just exclude those areas from the allocator. Additionally, to simplify + * the driver load, we use the maximum WOPCM size in this logic instead of the + * programmed one, so we don't need to wait until the actual size to be + * programmed is determined (which requires FW fetch) before initializing the + * GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs + * depending on the platform) but we can live with this. Another benefit of this + * is the GuC bootrom can't access anything below the WOPCM max size so anything + * the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT + * above the WOPCM max size. Starting the GGTT allocations above the WOPCM max + * give us the correct placement for free. + */ + static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index) { @@ -128,11 +161,12 @@ static void ggtt_fini_early(struct drm_device *drm, void *arg) { struct xe_ggtt *ggtt = arg; + destroy_workqueue(ggtt->wq); mutex_destroy(&ggtt->lock); drm_mm_takedown(&ggtt->mm); } -static void ggtt_fini(struct drm_device *drm, void *arg) +static void ggtt_fini(void *arg) { struct xe_ggtt *ggtt = arg; @@ -164,12 +198,16 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; -/* - * Early GGTT initialization, which allows to create new mappings usable by the - * GuC. - * Mappings are not usable by the HW engines, as it doesn't have scratch / +/** + * xe_ggtt_init_early - Early GGTT initialization + * @ggtt: the &xe_ggtt to be initialized + * + * It allows to create new mappings usable by the GuC. + * Mappings are not usable by the HW engines, as it doesn't have scratch nor * initial clear done to it yet. That will happen in the regular, non-early - * GGTT init. + * GGTT initialization. + * + * Return: 0 on success or a negative error code on failure. */ int xe_ggtt_init_early(struct xe_ggtt *ggtt) { @@ -194,29 +232,6 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ggtt->flags |= XE_GGTT_FLAGS_64K; - /* - * 8B per entry, each points to a 4KB page. - * - * The GuC address space is limited on both ends of the GGTT, because - * the GuC shim HW redirects accesses to those addresses to other HW - * areas instead of going through the GGTT. On the bottom end, the GuC - * can't access offsets below the WOPCM size, while on the top side the - * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of - * checking each object to see if they are accessed by GuC or not, we - * just exclude those areas from the allocator. Additionally, to - * simplify the driver load, we use the maximum WOPCM size in this logic - * instead of the programmed one, so we don't need to wait until the - * actual size to be programmed is determined (which requires FW fetch) - * before initializing the GGTT. These simplifications might waste space - * in the GGTT (about 20-25 MBs depending on the platform) but we can - * live with this. - * - * Another benifit of this is the GuC bootrom can't access anything - * below the WOPCM max size so anything the bootom needs to access (e.g. - * a RSA key) needs to be placed in the GGTT above the WOPCM max size. - * Starting the GGTT allocations above the WOPCM max give us the correct - * placement for free. - */ if (ggtt->size > GUC_GGTT_TOP) ggtt->size = GUC_GGTT_TOP; @@ -228,6 +243,8 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) else ggtt->pt_ops = &xelp_pt_ops; + ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, 0); + drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), ggtt->size - xe_wopcm_size(xe)); mutex_init(&ggtt->lock); @@ -262,6 +279,77 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) mutex_unlock(&ggtt->lock); } +static void ggtt_node_remove(struct xe_ggtt_node *node) +{ + struct xe_ggtt *ggtt = node->ggtt; + struct xe_device *xe = tile_to_xe(ggtt->tile); + bool bound; + int idx; + + bound = drm_dev_enter(&xe->drm, &idx); + + mutex_lock(&ggtt->lock); + if (bound) + xe_ggtt_clear(ggtt, node->base.start, node->base.size); + drm_mm_remove_node(&node->base); + node->base.size = 0; + mutex_unlock(&ggtt->lock); + + if (!bound) + goto free_node; + + if (node->invalidate_on_remove) + xe_ggtt_invalidate(ggtt); + + drm_dev_exit(idx); + +free_node: + xe_ggtt_node_fini(node); +} + +static void ggtt_node_remove_work_func(struct work_struct *work) +{ + struct xe_ggtt_node *node = container_of(work, typeof(*node), + delayed_removal_work); + struct xe_device *xe = tile_to_xe(node->ggtt->tile); + + xe_pm_runtime_get(xe); + ggtt_node_remove(node); + xe_pm_runtime_put(xe); +} + +/** + * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT + * @node: the &xe_ggtt_node to be removed + * @invalidate: if node needs invalidation upon removal + */ +void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) +{ + struct xe_ggtt *ggtt; + struct xe_device *xe; + + if (!node || !node->ggtt) + return; + + ggtt = node->ggtt; + xe = tile_to_xe(ggtt->tile); + + node->invalidate_on_remove = invalidate; + + if (xe_pm_runtime_get_if_active(xe)) { + ggtt_node_remove(node); + xe_pm_runtime_put(xe); + } else { + queue_work(ggtt->wq, &node->delayed_removal_work); + } +} + +/** + * xe_ggtt_init - Regular non-early GGTT initialization + * @ggtt: the &xe_ggtt to be initialized + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_init(struct xe_ggtt *ggtt) { struct xe_device *xe = tile_to_xe(ggtt->tile); @@ -289,7 +377,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) xe_ggtt_initial_clear(ggtt); - return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt); + return devm_add_action_or_reset(xe->drm.dev, ggtt_fini, ggtt); err: ggtt->scratch = NULL; return err; @@ -314,26 +402,6 @@ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); } -void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) -{ - u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; - u64 addr, scratch_pte; - - scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index); - - printk("%sGlobal GTT:", prefix); - for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { - unsigned int i = addr / XE_PAGE_SIZE; - - xe_tile_assert(ggtt->tile, addr <= U32_MAX); - if (ggtt->gsm[i] == scratch_pte) - continue; - - printk("%s ggtt[0x%08x] = 0x%016llx", - prefix, (u32)addr, ggtt->gsm[i]); - } -} - static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, const struct drm_mm_node *node, const char *description) { @@ -347,88 +415,180 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, } /** - * xe_ggtt_balloon - prevent allocation of specified GGTT addresses - * @ggtt: the &xe_ggtt where we want to make reservation + * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses + * @node: the &xe_ggtt_node to hold reserved GGTT node * @start: the starting GGTT address of the reserved region * @end: then end GGTT address of the reserved region - * @node: the &drm_mm_node to hold reserved GGTT node * - * Use xe_ggtt_deballoon() to release a reserved GGTT node. + * Use xe_ggtt_node_remove_balloon() to release a reserved GGTT node. * * Return: 0 on success or a negative error code on failure. */ -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct drm_mm_node *node) +int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end) { + struct xe_ggtt *ggtt = node->ggtt; int err; xe_tile_assert(ggtt->tile, start < end); xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); - xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(node)); + xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); - node->color = 0; - node->start = start; - node->size = end - start; + node->base.color = 0; + node->base.start = start; + node->base.size = end - start; mutex_lock(&ggtt->lock); - err = drm_mm_reserve_node(&ggtt->mm, node); + err = drm_mm_reserve_node(&ggtt->mm, &node->base); mutex_unlock(&ggtt->lock); if (xe_gt_WARN(ggtt->tile->primary_gt, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", - node->start, node->start + node->size, ERR_PTR(err))) + node->base.start, node->base.start + node->base.size, ERR_PTR(err))) return err; - xe_ggtt_dump_node(ggtt, node, "balloon"); + xe_ggtt_dump_node(ggtt, &node->base, "balloon"); return 0; } /** - * xe_ggtt_deballoon - release a reserved GGTT region - * @ggtt: the &xe_ggtt where reserved node belongs - * @node: the &drm_mm_node with reserved GGTT region + * xe_ggtt_node_remove_balloon - release a reserved GGTT region + * @node: the &xe_ggtt_node with reserved GGTT region * - * See xe_ggtt_balloon() for details. + * See xe_ggtt_node_insert_balloon() for details. */ -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node) +void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node) { - if (!drm_mm_node_allocated(node)) + if (!node || !node->ggtt) return; - xe_ggtt_dump_node(ggtt, node, "deballoon"); + if (!drm_mm_node_allocated(&node->base)) + goto free_node; - mutex_lock(&ggtt->lock); - drm_mm_remove_node(node); - mutex_unlock(&ggtt->lock); + xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); + + mutex_lock(&node->ggtt->lock); + drm_mm_remove_node(&node->base); + mutex_unlock(&node->ggtt->lock); + +free_node: + xe_ggtt_node_fini(node); } -int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, - u32 size, u32 align, u32 mm_flags) +/** + * xe_ggtt_node_insert_locked - Locked version to insert a &xe_ggtt_node into the GGTT + * @node: the &xe_ggtt_node to be inserted + * @size: size of the node + * @align: alignment constrain of the node + * @mm_flags: flags to control the node behavior + * + * It cannot be called without first having called xe_ggtt_init() once. + * To be used in cases where ggtt->lock is already taken. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, + u32 size, u32 align, u32 mm_flags) { - return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0, + return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0, mm_flags); } -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, - u32 size, u32 align) +/** + * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT + * @node: the &xe_ggtt_node to be inserted + * @size: size of the node + * @align: alignment constrain of the node + * + * It cannot be called without first having called xe_ggtt_init() once. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) { int ret; - mutex_lock(&ggtt->lock); - ret = xe_ggtt_insert_special_node_locked(ggtt, node, size, - align, DRM_MM_INSERT_HIGH); - mutex_unlock(&ggtt->lock); + if (!node || !node->ggtt) + return -ENOENT; + + mutex_lock(&node->ggtt->lock); + ret = xe_ggtt_node_insert_locked(node, size, align, + DRM_MM_INSERT_HIGH); + mutex_unlock(&node->ggtt->lock); return ret; } +/** + * xe_ggtt_node_init - Initialize %xe_ggtt_node struct + * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. + * + * This function will allocated the struct %xe_ggtt_node and return it's pointer. + * This struct will then be freed after the node removal upon xe_ggtt_node_remove() + * or xe_ggtt_node_remove_balloon(). + * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated + * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), + * xe_ggtt_node_insert_balloon() will ensure the node is inserted or reserved in GGTT. + * + * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. + **/ +struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) +{ + struct xe_ggtt_node *node = kzalloc(sizeof(*node), GFP_NOFS); + + if (!node) + return ERR_PTR(-ENOMEM); + + INIT_WORK(&node->delayed_removal_work, ggtt_node_remove_work_func); + node->ggtt = ggtt; + + return node; +} + +/** + * xe_ggtt_node_fini - Forcebly finalize %xe_ggtt_node struct + * @node: the &xe_ggtt_node to be freed + * + * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), + * or xe_ggtt_node_insert_balloon(); and this @node is not going to be reused, then, + * this function needs to be called to free the %xe_ggtt_node struct + **/ +void xe_ggtt_node_fini(struct xe_ggtt_node *node) +{ + kfree(node); +} + +/** + * xe_ggtt_node_allocated - Check if node is allocated in GGTT + * @node: the &xe_ggtt_node to be inspected + * + * Return: True if allocated, False otherwise. + */ +bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) +{ + if (!node || !node->ggtt) + return false; + + return drm_mm_node_allocated(&node->base); +} + +/** + * xe_ggtt_map_bo - Map the BO into GGTT + * @ggtt: the &xe_ggtt where node will be mapped + * @bo: the &xe_bo to be mapped + */ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; - u64 start = bo->ggtt_node.start; + u64 start; u64 offset, pte; + if (XE_WARN_ON(!bo->ggtt_node)) + return; + + start = bo->ggtt_node->base.start; + for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte); @@ -444,9 +604,9 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) alignment = SZ_64K; - if (XE_WARN_ON(bo->ggtt_node.size)) { + if (XE_WARN_ON(bo->ggtt_node)) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); return 0; } @@ -455,69 +615,108 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, return err; xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); + + bo->ggtt_node = xe_ggtt_node_init(ggtt); + if (IS_ERR(bo->ggtt_node)) { + err = PTR_ERR(bo->ggtt_node); + goto out; + } + mutex_lock(&ggtt->lock); - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, alignment, 0, start, end, 0); - if (!err) + if (err) + xe_ggtt_node_fini(bo->ggtt_node); + else xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) xe_ggtt_invalidate(ggtt); + +out: xe_pm_runtime_put(tile_to_xe(ggtt->tile)); return err; } +/** + * xe_ggtt_insert_bo_at - Insert BO at a specific GGTT space + * @ggtt: the &xe_ggtt where bo will be inserted + * @bo: the &xe_bo to be inserted + * @start: address where it will be inserted + * @end: end of the range where it will be inserted + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); } +/** + * xe_ggtt_insert_bo - Insert BO into GGTT + * @ggtt: the &xe_ggtt where bo will be inserted + * @bo: the &xe_bo to be inserted + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); } -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, - bool invalidate) +/** + * xe_ggtt_remove_bo - Remove a BO from the GGTT + * @ggtt: the &xe_ggtt where node will be removed + * @bo: the &xe_bo to be removed + */ +void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - struct xe_device *xe = tile_to_xe(ggtt->tile); - bool bound; - int idx; - - bound = drm_dev_enter(&xe->drm, &idx); - if (bound) - xe_pm_runtime_get_noresume(xe); - - mutex_lock(&ggtt->lock); - if (bound) - xe_ggtt_clear(ggtt, node->start, node->size); - drm_mm_remove_node(node); - node->size = 0; - mutex_unlock(&ggtt->lock); - - if (!bound) + if (XE_WARN_ON(!bo->ggtt_node)) return; - if (invalidate) - xe_ggtt_invalidate(ggtt); + /* This BO is not currently in the GGTT */ + xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); - xe_pm_runtime_put(xe); - drm_dev_exit(idx); + xe_ggtt_node_remove(bo->ggtt_node, + bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } -void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) +/** + * xe_ggtt_largest_hole - Largest GGTT hole + * @ggtt: the &xe_ggtt that will be inspected + * @alignment: minimum alignment + * @spare: If not NULL: in: desired memory size to be spared / out: Adjusted possible spare + * + * Return: size of the largest continuous GGTT region + */ +u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare) { - if (XE_WARN_ON(!bo->ggtt_node.size)) - return; + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); + u64 hole_start, hole_end, hole_size; + u64 max_hole = 0; - /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); + mutex_lock(&ggtt->lock); - xe_ggtt_remove_node(ggtt, &bo->ggtt_node, - bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + if (spare) + *spare -= min3(*spare, hole_size, max_hole); + max_hole = max(max_hole, hole_size); + } + + mutex_unlock(&ggtt->lock); + + return max_hole; } #ifdef CONFIG_PCI_IOV @@ -548,22 +747,28 @@ static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node /** * xe_ggtt_assign - assign a GGTT region to the VF - * @ggtt: the &xe_ggtt where the node belongs - * @node: the &drm_mm_node to update + * @node: the &xe_ggtt_node to update * @vfid: the VF identifier * * This function is used by the PF driver to assign a GGTT region to the VF. * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some * platforms VFs can't modify that either. */ -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) +void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) { - mutex_lock(&ggtt->lock); - xe_ggtt_assign_locked(ggtt, node, vfid); - mutex_unlock(&ggtt->lock); + mutex_lock(&node->ggtt->lock); + xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); + mutex_unlock(&node->ggtt->lock); } #endif +/** + * xe_ggtt_dump - Dump GGTT for debug + * @ggtt: the &xe_ggtt to be dumped + * @p: the &drm_mm_printer helper handle to be used to dump the information + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) { int err; @@ -576,3 +781,43 @@ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) mutex_unlock(&ggtt->lock); return err; } + +/** + * xe_ggtt_print_holes - Print holes + * @ggtt: the &xe_ggtt to be inspected + * @alignment: min alignment + * @p: the &drm_printer + * + * Print GGTT ranges that are available and return total size available. + * + * Return: Total available size. + */ +u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p) +{ + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); + u64 hole_start, hole_end, hole_size; + u64 total = 0; + char buf[10]; + + mutex_lock(&ggtt->lock); + + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + total += hole_size; + + string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", + hole_start, hole_end - 1, buf); + } + + mutex_unlock(&ggtt->lock); + + return total; +} diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 6a96fd54bf60..27e7d67de004 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -12,28 +12,30 @@ struct drm_printer; int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); -void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); - -int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node); -void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node); - -int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, - u32 size, u32 align); -int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, - struct drm_mm_node *node, - u32 size, u32 align, u32 mm_flags); -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, - bool invalidate); + +struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt); +void xe_ggtt_node_fini(struct xe_ggtt_node *node); +int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, + u64 start, u64 size); +void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node); + +int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align); +int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, + u32 size, u32 align, u32 mm_flags); +void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate); +bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end); void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); +u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare); int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); +u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p); #ifdef CONFIG_PCI_IOV -void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid); +void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid); #endif #endif diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index 2245d88d8f39..cb02b7994a9a 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -13,30 +13,70 @@ struct xe_bo; struct xe_gt; +/** + * struct xe_ggtt - Main GGTT struct + * + * In general, each tile can contains its own Global Graphics Translation Table + * (GGTT) instance. + */ struct xe_ggtt { + /** @tile: Back pointer to tile where this GGTT belongs */ struct xe_tile *tile; - + /** @size: Total size of this GGTT */ u64 size; #define XE_GGTT_FLAGS_64K BIT(0) + /** + * @flags: Flags for this GGTT + * Acceptable flags: + * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. + */ unsigned int flags; - + /** @scratch: Internal object allocation used as a scratch page */ struct xe_bo *scratch; - + /** @lock: Mutex lock to protect GGTT data */ struct mutex lock; - + /** + * @gsm: The iomem pointer to the actual location of the translation + * table located in the GSM for easy PTE manipulation + */ u64 __iomem *gsm; - + /** @pt_ops: Page Table operations per platform */ const struct xe_ggtt_pt_ops *pt_ops; - + /** @mm: The memory manager used to manage individual GGTT allocations */ struct drm_mm mm; - /** @access_count: counts GGTT writes */ unsigned int access_count; + /** @wq: Dedicated unordered work queue to process node removals */ + struct workqueue_struct *wq; +}; + +/** + * struct xe_ggtt_node - A node in GGTT. + * + * This struct needs to be initialized (only-once) with xe_ggtt_node_init() before any node + * insertion, reservation, or 'ballooning'. + * It will, then, be finalized by either xe_ggtt_node_remove() or xe_ggtt_node_deballoon(). + */ +struct xe_ggtt_node { + /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */ + struct xe_ggtt *ggtt; + /** @base: A drm_mm_node */ + struct drm_mm_node base; + /** @delayed_removal_work: The work struct for the delayed removal */ + struct work_struct delayed_removal_work; + /** @invalidate_on_remove: If it needs invalidation upon removal */ + bool invalidate_on_remove; }; +/** + * struct xe_ggtt_pt_ops - GGTT Page table operations + * Which can vary from platform to platform. + */ struct xe_ggtt_pt_ops { + /** @pte_encode_bo: Encode PTE address for a given BO */ u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); + /** @ggtt_set_pte: Directly write into GGTT's PTE */ void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); }; diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index e4ad1d6ce1d5..c518d1d16d82 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -15,11 +15,11 @@ static void xe_sched_process_msg_queue_if_ready(struct xe_gpu_scheduler *sched) { struct xe_sched_msg *msg; - spin_lock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); if (msg) xe_sched_process_msg_queue(sched); - spin_unlock(&sched->base.job_list_lock); + xe_sched_msg_unlock(sched); } static struct xe_sched_msg * @@ -27,12 +27,12 @@ xe_sched_get_msg(struct xe_gpu_scheduler *sched) { struct xe_sched_msg *msg; - spin_lock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); msg = list_first_entry_or_null(&sched->msgs, struct xe_sched_msg, link); if (msg) - list_del(&msg->link); - spin_unlock(&sched->base.job_list_lock); + list_del_init(&msg->link); + xe_sched_msg_unlock(sched); return msg; } @@ -93,9 +93,16 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg) { - spin_lock(&sched->base.job_list_lock); - list_add_tail(&msg->link, &sched->msgs); - spin_unlock(&sched->base.job_list_lock); + xe_sched_msg_lock(sched); + xe_sched_add_msg_locked(sched, msg); + xe_sched_msg_unlock(sched); +} +void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg) +{ + lockdep_assert_held(&sched->base.job_list_lock); + + list_add_tail(&msg->link, &sched->msgs); xe_sched_process_msg_queue(sched); } diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 10c6bb9c9386..cee9c6809fc0 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -24,6 +24,18 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); +void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, + struct xe_sched_msg *msg); + +static inline void xe_sched_msg_lock(struct xe_gpu_scheduler *sched) +{ + spin_lock(&sched->base.job_list_lock); +} + +static inline void xe_sched_msg_unlock(struct xe_gpu_scheduler *sched) +{ + spin_unlock(&sched->base.job_list_lock); +} static inline void xe_sched_stop(struct xe_gpu_scheduler *sched) { diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index f8239a13fa2b..8a137cb83318 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -260,7 +260,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) struct xe_tile *tile = gt_to_tile(gt); int ret; - if (XE_WA(gt, 14018094691)) { + if (XE_WA(tile->primary_gt, 14018094691)) { ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* @@ -278,7 +278,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); - if (XE_WA(gt, 14018094691)) + if (XE_WA(tile->primary_gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); if (ret) @@ -437,7 +437,7 @@ out: return ret; } -static void free_resources(struct drm_device *drm, void *arg) +static void free_resources(void *arg) { struct xe_gsc *gsc = arg; @@ -450,11 +450,6 @@ static void free_resources(struct drm_device *drm, void *arg) xe_exec_queue_put(gsc->q); gsc->q = NULL; } - - if (gsc->private) { - xe_bo_unpin_map_no_vm(gsc->private); - gsc->private = NULL; - } } int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) @@ -474,10 +469,9 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) if (!hwe) return -ENODEV; - bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, - ttm_bo_type_kernel, - XE_BO_FLAG_STOLEN | - XE_BO_FLAG_GGTT); + bo = xe_managed_bo_create_pin_map(xe, tile, SZ_4M, + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -501,7 +495,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) gsc->q = q; gsc->wq = wq; - err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); + err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index aa812a2bc3ed..2d6ea8c01445 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -62,11 +62,6 @@ gsc_to_gt(struct xe_gsc *gsc) return container_of(gsc, struct xe_gt, uc.gsc); } -static inline struct xe_device *kdev_to_xe(struct device *kdev) -{ - return dev_get_drvdata(kdev); -} - bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); @@ -345,7 +340,7 @@ void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir) static int xe_gsc_proxy_component_bind(struct device *xe_kdev, struct device *mei_kdev, void *data) { - struct xe_device *xe = kdev_to_xe(xe_kdev); + struct xe_device *xe = kdev_to_xe_device(xe_kdev); struct xe_gt *gt = xe->tiles[0].media_gt; struct xe_gsc *gsc = >->uc.gsc; @@ -360,7 +355,7 @@ static int xe_gsc_proxy_component_bind(struct device *xe_kdev, static void xe_gsc_proxy_component_unbind(struct device *xe_kdev, struct device *mei_kdev, void *data) { - struct xe_device *xe = kdev_to_xe(xe_kdev); + struct xe_device *xe = kdev_to_xe_device(xe_kdev); struct xe_gt *gt = xe->tiles[0].media_gt; struct xe_gsc *gsc = >->uc.gsc; @@ -376,27 +371,6 @@ static const struct component_ops xe_gsc_proxy_component_ops = { .unbind = xe_gsc_proxy_component_unbind, }; -static void proxy_channel_free(struct drm_device *drm, void *arg) -{ - struct xe_gsc *gsc = arg; - - if (!gsc->proxy.bo) - return; - - if (gsc->proxy.to_csme) { - kfree(gsc->proxy.to_csme); - gsc->proxy.to_csme = NULL; - gsc->proxy.from_csme = NULL; - } - - if (gsc->proxy.bo) { - iosys_map_clear(&gsc->proxy.to_gsc); - iosys_map_clear(&gsc->proxy.from_gsc); - xe_bo_unpin_map_no_vm(gsc->proxy.bo); - gsc->proxy.bo = NULL; - } -} - static int proxy_channel_alloc(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); @@ -405,18 +379,15 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) struct xe_bo *bo; void *csme; - csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); + csme = drmm_kzalloc(&xe->drm, GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); if (!csme) return -ENOMEM; - bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); - if (IS_ERR(bo)) { - kfree(csme); + bo = xe_managed_bo_create_pin_map(xe, tile, GSC_PROXY_CHANNEL_SIZE, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); + if (IS_ERR(bo)) return PTR_ERR(bo); - } gsc->proxy.bo = bo; gsc->proxy.to_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0); @@ -424,7 +395,7 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) gsc->proxy.to_csme = csme; gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE; - return drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc); + return 0; } /** diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 58895ed22f6e..08a004d698d4 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -12,6 +12,8 @@ #include <generated/xe_wa_oob.h> +#include <generated/xe_wa_oob.h> + #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_gt_regs.h" @@ -110,9 +112,9 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) if (!xe_gt_is_media_type(gt)) { xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); - reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; - xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); @@ -134,9 +136,9 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) if (WARN_ON(err)) return; - reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg &= ~CG_DIS_CNTLBUS; - xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); } @@ -557,7 +559,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_mcr_init_early(gt); xe_pat_init(gt); - xe_gt_enable_host_l2_vram(gt); err = xe_uc_init(>->uc); if (err) @@ -569,6 +570,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_topology_init(gt); xe_gt_mcr_init(gt); + xe_gt_enable_host_l2_vram(gt); out_fw: xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 5e7fd937917a..8f95d3a5949b 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -17,7 +17,9 @@ #include "xe_gt_mcr.h" #include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_sriov_vf_debugfs.h" +#include "xe_gt_stats.h" #include "xe_gt_topology.h" +#include "xe_guc_hwconfig.h" #include "xe_hw_engine.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -269,6 +271,15 @@ static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p) return 0; } +static int hwconfig(struct xe_gt *gt, struct drm_printer *p) +{ + xe_pm_runtime_get(gt_to_xe(gt)); + xe_guc_hwconfig_dump(>->uc.guc, p); + xe_pm_runtime_put(gt_to_xe(gt)); + + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, @@ -286,6 +297,8 @@ static const struct drm_info_list debugfs_list[] = { {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, + {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info}, + {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig}, }; void xe_gt_debugfs_register(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 6d948a469126..7d7bd0be6233 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -8,8 +8,10 @@ #include "regs/xe_gt_regs.h" #include "xe_assert.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_gt_topology.h" #include "xe_gt_types.h" +#include "xe_guc_hwconfig.h" #include "xe_mmio.h" #include "xe_sriov.h" @@ -297,6 +299,36 @@ static void init_steering_mslice(struct xe_gt *gt) static unsigned int dss_per_group(struct xe_gt *gt) { + struct xe_guc *guc = >->uc.guc; + u32 max_slices = 0, max_subslices = 0; + int ret; + + /* + * Try to query the GuC's hwconfig table for the maximum number of + * slices and subslices. These don't reflect the platform's actual + * slice/DSS counts, just the physical layout by which we should + * determine the steering targets. On older platforms with older GuC + * firmware releases it's possible that these attributes may not be + * included in the table, so we can always fall back to the old + * hardcoded layouts. + */ +#define HWCONFIG_ATTR_MAX_SLICES 1 +#define HWCONFIG_ATTR_MAX_SUBSLICES 70 + + ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SLICES, + &max_slices); + if (ret < 0 || max_slices == 0) + goto fallback; + + ret = xe_guc_hwconfig_lookup_u32(guc, HWCONFIG_ATTR_MAX_SUBSLICES, + &max_subslices); + if (ret < 0 || max_subslices == 0) + goto fallback; + + return DIV_ROUND_UP(max_subslices, max_slices); + +fallback: + xe_gt_dbg(gt, "GuC hwconfig cannot provide dss/slice; using typical fallback values\n"); if (gt_to_xe(gt)->info.platform == XE_PVC) return 8; else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) @@ -314,16 +346,16 @@ static unsigned int dss_per_group(struct xe_gt *gt) */ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) { - int dss_per_grp = dss_per_group(gt); - xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS); - *group = dss / dss_per_grp; - *instance = dss % dss_per_grp; + *group = dss / gt->steering_dss_per_grp; + *instance = dss % gt->steering_dss_per_grp; } static void init_steering_dss(struct xe_gt *gt) { + gt->steering_dss_per_grp = dss_per_group(gt); + xe_gt_mcr_get_dss_steering(gt, min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)), diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 9292d5468868..0be4687bfc20 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -287,7 +287,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) PFD_VIRTUAL_ADDR_LO_SHIFT; pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % - PF_QUEUE_NUM_DW; + pf_queue->num_dw; ret = true; } spin_unlock_irq(&pf_queue->lock); @@ -299,7 +299,8 @@ static bool pf_queue_full(struct pf_queue *pf_queue) { lockdep_assert_held(&pf_queue->lock); - return CIRC_SPACE(pf_queue->head, pf_queue->tail, PF_QUEUE_NUM_DW) <= + return CIRC_SPACE(pf_queue->head, pf_queue->tail, + pf_queue->num_dw) <= PF_MSG_LEN_DW; } @@ -312,22 +313,23 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) u32 asid; bool full; - /* - * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 - */ - BUILD_BUG_ON(PF_QUEUE_NUM_DW % PF_MSG_LEN_DW); - if (unlikely(len != PF_MSG_LEN_DW)) return -EPROTO; asid = FIELD_GET(PFD_ASID, msg[1]); pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); + /* + * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 + */ + xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW)); + spin_lock_irqsave(&pf_queue->lock, flags); full = pf_queue_full(pf_queue); if (!full) { memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); - pf_queue->head = (pf_queue->head + len) % PF_QUEUE_NUM_DW; + pf_queue->head = (pf_queue->head + len) % + pf_queue->num_dw; queue_work(gt->usm.pf_wq, &pf_queue->worker); } else { drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); @@ -382,18 +384,61 @@ static void pf_queue_work_func(struct work_struct *w) static void acc_queue_work_func(struct work_struct *w); -int xe_gt_pagefault_init(struct xe_gt *gt) +static void pagefault_fini(void *arg) { + struct xe_gt *gt = arg; struct xe_device *xe = gt_to_xe(gt); int i; if (!xe->info.has_usm) + return; + + destroy_workqueue(gt->usm.acc_wq); + destroy_workqueue(gt->usm.pf_wq); + + for (i = 0; i < NUM_PF_QUEUE; ++i) + kfree(gt->usm.pf_queue[i].data); +} + +static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) +{ + xe_dss_mask_t all_dss; + int num_dss, num_eus; + + bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, + XE_MAX_DSS_FUSE_BITS); + + num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); + num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, + XE_MAX_EU_FUSE_BITS) * num_dss; + + /* user can issue separate page faults per EU and per CS */ + pf_queue->num_dw = + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; + + pf_queue->gt = gt; + pf_queue->data = kcalloc(pf_queue->num_dw, sizeof(u32), GFP_KERNEL); + if (!pf_queue->data) + return -ENOMEM; + + spin_lock_init(&pf_queue->lock); + INIT_WORK(&pf_queue->worker, pf_queue_work_func); + + return 0; +} + +int xe_gt_pagefault_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + int i, ret = 0; + + if (!xe->info.has_usm) return 0; for (i = 0; i < NUM_PF_QUEUE; ++i) { - gt->usm.pf_queue[i].gt = gt; - spin_lock_init(>->usm.pf_queue[i].lock); - INIT_WORK(>->usm.pf_queue[i].worker, pf_queue_work_func); + ret = xe_alloc_pf_queue(gt, >->usm.pf_queue[i]); + if (ret) + return ret; } for (i = 0; i < NUM_ACC_QUEUE; ++i) { gt->usm.acc_queue[i].gt = gt; @@ -409,10 +454,12 @@ int xe_gt_pagefault_init(struct xe_gt *gt) gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", WQ_UNBOUND | WQ_HIGHPRI, NUM_ACC_QUEUE); - if (!gt->usm.acc_wq) + if (!gt->usm.acc_wq) { + destroy_workqueue(gt->usm.pf_wq); return -ENOMEM; + } - return 0; + return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); } void xe_gt_pagefault_reset(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 52c7277d243d..41ed07b153b5 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -232,14 +232,14 @@ static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config) { u32 n = 0; - if (drm_mm_node_allocated(&config->ggtt_region)) { + if (xe_ggtt_node_allocated(config->ggtt_region)) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region.start); - cfg[n++] = upper_32_bits(config->ggtt_region.start); + cfg[n++] = lower_32_bits(config->ggtt_region->base.start); + cfg[n++] = upper_32_bits(config->ggtt_region->base.start); cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region.size); - cfg[n++] = upper_32_bits(config->ggtt_region.size); + cfg[n++] = lower_32_bits(config->ggtt_region->base.size); + cfg[n++] = upper_32_bits(config->ggtt_region->base.size); } return n; @@ -369,29 +369,28 @@ static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u6 return err ?: err2; } -static void pf_release_ggtt(struct xe_tile *tile, struct drm_mm_node *node) +static void pf_release_ggtt(struct xe_tile *tile, struct xe_ggtt_node *node) { - struct xe_ggtt *ggtt = tile->mem.ggtt; - - if (drm_mm_node_allocated(node)) { + if (xe_ggtt_node_allocated(node)) { /* * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() * is redundant, as PTE will be implicitly re-assigned to PF by * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). */ - xe_ggtt_remove_node(ggtt, node, false); + xe_ggtt_node_remove(node, false); } } static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config) { - pf_release_ggtt(gt_to_tile(gt), &config->ggtt_region); + pf_release_ggtt(gt_to_tile(gt), config->ggtt_region); + config->ggtt_region = NULL; } static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct drm_mm_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = config->ggtt_region; struct xe_tile *tile = gt_to_tile(gt); struct xe_ggtt *ggtt = tile->mem.ggtt; u64 alignment = pf_get_ggtt_alignment(gt); @@ -403,40 +402,48 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) size = round_up(size, alignment); - if (drm_mm_node_allocated(node)) { + if (xe_ggtt_node_allocated(node)) { err = pf_distribute_config_ggtt(tile, vfid, 0, 0); if (unlikely(err)) return err; pf_release_ggtt(tile, node); } - xe_gt_assert(gt, !drm_mm_node_allocated(node)); + xe_gt_assert(gt, !xe_ggtt_node_allocated(node)); if (!size) return 0; - err = xe_ggtt_insert_special_node(ggtt, node, size, alignment); + node = xe_ggtt_node_init(ggtt); + if (IS_ERR(node)) + return PTR_ERR(node); + + err = xe_ggtt_node_insert(node, size, alignment); if (unlikely(err)) - return err; + goto err; - xe_ggtt_assign(ggtt, node, vfid); + xe_ggtt_assign(node, vfid); xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n", - vfid, node->start, node->start + node->size - 1); + vfid, node->base.start, node->base.start + node->base.size - 1); - err = pf_distribute_config_ggtt(gt->tile, vfid, node->start, node->size); + err = pf_distribute_config_ggtt(gt->tile, vfid, node->base.start, node->base.size); if (unlikely(err)) - return err; + goto err; + config->ggtt_region = node; return 0; +err: + xe_ggtt_node_fini(node); + return err; } static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) { struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); - struct drm_mm_node *node = &config->ggtt_region; + struct xe_ggtt_node *node = config->ggtt_region; xe_gt_assert(gt, !xe_gt_is_media_type(gt)); - return drm_mm_node_allocated(node) ? node->size : 0; + return xe_ggtt_node_allocated(node) ? node->base.size : 0; } /** @@ -587,30 +594,11 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, static u64 pf_get_max_ggtt(struct xe_gt *gt) { struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; - const struct drm_mm *mm = &ggtt->mm; - const struct drm_mm_node *entry; u64 alignment = pf_get_ggtt_alignment(gt); u64 spare = pf_get_spare_ggtt(gt); - u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); - u64 hole_start, hole_end, hole_size; - u64 max_hole = 0; - - mutex_lock(&ggtt->lock); - - drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { - hole_start = max(hole_start, hole_min_start); - hole_start = ALIGN(hole_start, alignment); - hole_end = ALIGN_DOWN(hole_end, alignment); - if (hole_start >= hole_end) - continue; - hole_size = hole_end - hole_start; - xe_gt_sriov_dbg_verbose(gt, "HOLE start %llx size %lluK\n", - hole_start, hole_size / SZ_1K); - spare -= min3(spare, hole_size, max_hole); - max_hole = max(max_hole, hole_size); - } + u64 max_hole; - mutex_unlock(&ggtt->lock); + max_hole = xe_ggtt_largest_hole(ggtt, alignment, &spare); xe_gt_sriov_dbg_verbose(gt, "HOLE max %lluK reserved %lluK\n", max_hole / SZ_1K, spare / SZ_1K); @@ -1928,6 +1916,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool is_primary = !xe_gt_is_media_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -1936,13 +1925,17 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_dbs = pf_get_vf_config_dbs(gt, vfid); /* note that GuC doorbells are optional */ - valid_any = valid_ggtt || valid_ctxs || valid_dbs; - valid_all = valid_ggtt && valid_ctxs; + valid_any = valid_ctxs || valid_dbs; + valid_all = valid_ctxs; + + /* and GGTT/LMEM is configured on primary GT only */ + valid_all = valid_all && valid_ggtt; + valid_any = valid_any || (valid_ggtt && is_primary); if (IS_DGFX(xe)) { bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); - valid_any = valid_any || valid_lmem; + valid_any = valid_any || (valid_lmem && is_primary); valid_all = valid_all && valid_lmem; } @@ -2020,13 +2013,15 @@ int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) for (n = 1; n <= total_vfs; n++) { config = >->sriov.pf.vfs[n].config; - if (!drm_mm_node_allocated(&config->ggtt_region)) + if (!xe_ggtt_node_allocated(config->ggtt_region)) continue; - string_get_size(config->ggtt_region.size, 1, STRING_UNITS_2, buf, sizeof(buf)); + string_get_size(config->ggtt_region->base.size, 1, STRING_UNITS_2, + buf, sizeof(buf)); drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n", - n, config->ggtt_region.start, - config->ggtt_region.start + config->ggtt_region.size - 1, buf); + n, config->ggtt_region->base.start, + config->ggtt_region->base.start + config->ggtt_region->base.size - 1, + buf); } return 0; @@ -2114,12 +2109,8 @@ int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p) int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p) { struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; - const struct drm_mm *mm = &ggtt->mm; - const struct drm_mm_node *entry; u64 alignment = pf_get_ggtt_alignment(gt); - u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); - u64 hole_start, hole_end, hole_size; - u64 spare, avail, total = 0; + u64 spare, avail, total; char buf[10]; xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); @@ -2127,24 +2118,8 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); spare = pf_get_spare_ggtt(gt); + total = xe_ggtt_print_holes(ggtt, alignment, p); - mutex_lock(&ggtt->lock); - - drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { - hole_start = max(hole_start, hole_min_start); - hole_start = ALIGN(hole_start, alignment); - hole_end = ALIGN_DOWN(hole_end, alignment); - if (hole_start >= hole_end) - continue; - hole_size = hole_end - hole_start; - total += hole_size; - - string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); - drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", - hole_start, hole_end - 1, buf); - } - - mutex_unlock(&ggtt->lock); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); string_get_size(total, 1, STRING_UNITS_2, buf, sizeof(buf)); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h index 7bc66656fcc7..2d3b73d78f14 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -6,8 +6,7 @@ #ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ #define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ -#include <drm/drm_mm.h> - +#include "xe_ggtt_types.h" #include "xe_guc_klv_thresholds_set_types.h" struct xe_bo; @@ -19,7 +18,7 @@ struct xe_bo; */ struct xe_gt_sriov_config { /** @ggtt_region: GGTT region assigned to the VF. */ - struct drm_mm_node ggtt_region; + struct xe_ggtt_node *ggtt_region; /** @lmem_obj: LMEM allocation for use by the VF. */ struct xe_bo *lmem_obj; /** @num_ctxs: number of GuC contexts IDs. */ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 47222bd9988d..4ebc82e607af 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -495,6 +495,25 @@ u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt) return gt->sriov.vf.self_config.lmem_size; } +static struct xe_ggtt_node * +vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end) +{ + struct xe_ggtt_node *node; + int err; + + node = xe_ggtt_node_init(ggtt); + if (IS_ERR(node)) + return node; + + err = xe_ggtt_node_insert_balloon(node, start, end); + if (err) { + xe_ggtt_node_fini(node); + return ERR_PTR(err); + } + + return node; +} + static int vf_balloon_ggtt(struct xe_gt *gt) { struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; @@ -502,7 +521,6 @@ static int vf_balloon_ggtt(struct xe_gt *gt) struct xe_ggtt *ggtt = tile->mem.ggtt; struct xe_device *xe = gt_to_xe(gt); u64 start, end; - int err; xe_gt_assert(gt, IS_SRIOV_VF(xe)); xe_gt_assert(gt, !xe_gt_is_media_type(gt)); @@ -528,35 +546,31 @@ static int vf_balloon_ggtt(struct xe_gt *gt) start = xe_wopcm_size(xe); end = config->ggtt_base; if (end != start) { - err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[0]); - if (err) - goto failed; + tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[0])) + return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]); } start = config->ggtt_base + config->ggtt_size; end = GUC_GGTT_TOP; if (end != start) { - err = xe_ggtt_balloon(ggtt, start, end, &tile->sriov.vf.ggtt_balloon[1]); - if (err) - goto deballoon; + tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end); + if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) { + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); + return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]); + } } return 0; - -deballoon: - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); -failed: - return err; } static void deballoon_ggtt(struct drm_device *drm, void *arg) { struct xe_tile *tile = arg; - struct xe_ggtt *ggtt = tile->mem.ggtt; xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile))); - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[1]); - xe_ggtt_deballoon(ggtt, &tile->sriov.vf.ggtt_balloon[0]); + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]); + xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]); } /** diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c new file mode 100644 index 000000000000..c7364a5aef8f --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_stats.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <linux/atomic.h> + +#include <drm/drm_print.h> + +#include "xe_gt.h" +#include "xe_gt_stats.h" + +/** + * xe_gt_stats_incr - Increments the specified stats counter + * @gt: graphics tile + * @id: xe_gt_stats_id type id that needs to be incremented + * @incr: value to be incremented with + * + * Increments the specified stats counter. + */ +void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr) +{ + if (id >= __XE_GT_STATS_NUM_IDS) + return; + + atomic_add(incr, >->stats.counters[id]); +} + +static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = { + "tlb_inval_count", +}; + +/** + * xe_gt_stats_print_info - Print the GT stats + * @gt: graphics tile + * @p: drm_printer where it will be printed out. + * + * This prints out all the available GT stats. + */ +int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p) +{ + enum xe_gt_stats_id id; + + for (id = 0; id < __XE_GT_STATS_NUM_IDS; ++id) + drm_printf(p, "%s: %d\n", stat_description[id], + atomic_read(>->stats.counters[id])); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h new file mode 100644 index 000000000000..91d944f6c4e4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GT_STATS_H_ +#define _XE_GT_STATS_H_ + +struct xe_gt; +struct drm_printer; + +enum xe_gt_stats_id { + XE_GT_STATS_ID_TLB_INVAL, + /* must be the last entry */ + __XE_GT_STATS_NUM_IDS, +}; + +#ifdef CONFIG_DEBUG_FS +int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); +#else +static inline void +xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, + int incr) +{ +} + +#endif +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 87cb76a8718c..cca9cf536f76 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -12,6 +12,7 @@ #include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_gt_stats.h" #include "xe_mmio.h" #include "xe_pm.h" #include "xe_sriov.h" @@ -213,6 +214,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, gt->tlb_invalidation.seqno = 1; } mutex_unlock(&guc->ct.lock); + xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1); return ret; } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 631928258d71..31946d7fe701 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -10,6 +10,7 @@ #include "xe_gt_idle_types.h" #include "xe_gt_sriov_pf_types.h" #include "xe_gt_sriov_vf_types.h" +#include "xe_gt_stats.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" #include "xe_oa.h" @@ -133,6 +134,14 @@ struct xe_gt { u8 has_indirect_ring_state:1; } info; +#if IS_ENABLED(CONFIG_DEBUG_FS) + /** @stats: GT stats */ + struct { + /** @stats.counters: counters for various GT stats */ + atomic_t counters[__XE_GT_STATS_NUM_IDS]; + } stats; +#endif + /** * @mmio: mmio info for GT. All GTs within a tile share the same * register space, but have their own copy of GSI registers at a @@ -238,9 +247,14 @@ struct xe_gt { struct pf_queue { /** @usm.pf_queue.gt: back pointer to GT */ struct xe_gt *gt; -#define PF_QUEUE_NUM_DW 128 /** @usm.pf_queue.data: data in the page fault queue */ - u32 data[PF_QUEUE_NUM_DW]; + u32 *data; + /** + * @usm.pf_queue.num_dw: number of DWORDS in the page + * fault queue. Dynamically calculated based on the number + * of compute resources available. + */ + u32 num_dw; /** * @usm.pf_queue.tail: tail pointer in DWs for page fault queue, * moved by worker which processes faults (consumer). @@ -368,6 +382,12 @@ struct xe_gt { } steering[NUM_STEERING_TYPES]; /** + * @steering_dss_per_grp: number of DSS per steering group (gslice, + * cslice, etc.). + */ + unsigned int steering_dss_per_grp; + + /** * @mcr_lock: protects the MCR_SELECTOR register for the duration * of a steered operation */ diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index de0fe9e65746..52df28032a6f 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -350,6 +350,8 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; + xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); + ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); if (ret) goto out; @@ -358,8 +360,6 @@ int xe_guc_init(struct xe_guc *guc) xe_guc_comm_init_early(guc); - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); - return 0; out: diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index e0bbf98f849d..c3e6b51f7a09 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -11,6 +11,16 @@ #include "xe_hw_engine_types.h" #include "xe_macros.h" +/* + * GuC version number components are defined to be only 8-bit size, + * so converting to a 32bit 8.8.8 integer allows simple (and safe) + * numerical comparisons. + */ +#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) +#define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) +#define GUC_SUBMIT_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]) +#define GUC_FIRMWARE_VER(guc) MAKE_VER_STRUCT((guc)->fw.versions.found[XE_UC_FW_VER_RELEASE]) + struct drm_printer; void xe_guc_comm_init_early(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 1c60b685dbc6..d1902a8581ca 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -24,6 +24,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_uc_fw.h" #include "xe_wa.h" /* Slack of a few additional entries per engine */ @@ -367,6 +368,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads) 0xC40, &offset, &remain); + if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406)) + guc_waklv_enable_simple(ads, + GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET, + &offset, &remain); + size = guc_ads_waklv_size(ads) - remain; if (!size) return; diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index beeeb120d1fc..f24dd5223926 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -105,12 +105,20 @@ ct_to_xe(struct xe_guc_ct *ct) * enough space to avoid backpressure on the driver. We increase the size * of the receive buffer (relative to the send) to ensure a G2H response * CTB has a landing spot. + * + * In addition to submissions, the G2H buffer needs to be able to hold + * enough space for recoverable page fault notifications. The number of + * page faults is interrupt driven and can be as much as the number of + * compute resources available. However, most of the actual work for these + * is in a separate page fault worker thread. Therefore we only need to + * make sure the queue has enough space to handle all of the submissions + * and responses and an extra buffer for incoming page faults. */ #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) #define CTB_H2G_BUFFER_SIZE (SZ_4K) -#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) -#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) +#define CTB_G2H_BUFFER_SIZE (SZ_128K) +#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2) /** * xe_guc_ct_queue_proc_time_jiffies - Return maximum time to process a full diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index d9b570a154a2..af2c817d552c 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -6,6 +6,7 @@ #include "xe_guc_hwconfig.h" #include <drm/drm_managed.h> +#include <drm/drm_print.h> #include "abi/guc_actions_abi.h" #include "xe_bo.h" @@ -103,3 +104,99 @@ void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst) xe_map_memcpy_from(xe, dst, &guc->hwconfig.bo->vmap, 0, guc->hwconfig.size); } + +void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p) +{ + size_t size = xe_guc_hwconfig_size(guc); + u32 *hwconfig; + u64 num_dw; + u32 extra_bytes; + int i = 0; + + if (size == 0) { + drm_printf(p, "No hwconfig available\n"); + return; + } + + num_dw = div_u64_rem(size, sizeof(u32), &extra_bytes); + + hwconfig = kzalloc(size, GFP_KERNEL); + if (!hwconfig) { + drm_printf(p, "Error: could not allocate hwconfig memory\n"); + return; + } + + xe_guc_hwconfig_copy(guc, hwconfig); + + /* An entry requires at least three dwords for key, length, value */ + while (i + 3 <= num_dw) { + u32 attribute = hwconfig[i++]; + u32 len_dw = hwconfig[i++]; + + if (i + len_dw > num_dw) { + drm_printf(p, "Error: Attribute %u is %u dwords, but only %llu remain\n", + attribute, len_dw, num_dw - i); + len_dw = num_dw - i; + } + + /* + * If it's a single dword (as most hwconfig attributes are), + * then it's probably a number that makes sense to display + * in decimal form. In the rare cases where it's more than + * one dword, just print it in hex form and let the user + * figure out how to interpret it. + */ + if (len_dw == 1) + drm_printf(p, "[%2u] = %u\n", attribute, hwconfig[i]); + else + drm_printf(p, "[%2u] = { %*ph }\n", attribute, + (int)(len_dw * sizeof(u32)), &hwconfig[i]); + i += len_dw; + } + + if (i < num_dw || extra_bytes) + drm_printf(p, "Error: %llu extra bytes at end of hwconfig\n", + (num_dw - i) * sizeof(u32) + extra_bytes); + + kfree(hwconfig); +} + +/* + * Lookup a specific 32-bit attribute value in the GuC's hwconfig table. + */ +int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val) +{ + size_t size = xe_guc_hwconfig_size(guc); + u64 num_dw = div_u64(size, sizeof(u32)); + u32 *hwconfig; + bool found = false; + int i = 0; + + if (num_dw == 0) + return -EINVAL; + + hwconfig = kzalloc(size, GFP_KERNEL); + if (!hwconfig) + return -ENOMEM; + + xe_guc_hwconfig_copy(guc, hwconfig); + + /* An entry requires at least three dwords for key, length, value */ + while (i + 3 <= num_dw) { + u32 key = hwconfig[i++]; + u32 len_dw = hwconfig[i++]; + + if (key != attribute) { + i += len_dw; + continue; + } + + *val = hwconfig[i]; + found = true; + break; + } + + kfree(hwconfig); + + return found ? 0 : -ENOENT; +} diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.h b/drivers/gpu/drm/xe/xe_guc_hwconfig.h index b5794d641900..ab4e5038236e 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.h +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.h @@ -8,10 +8,13 @@ #include <linux/types.h> +struct drm_printer; struct xe_guc; int xe_guc_hwconfig_init(struct xe_guc *guc); u32 xe_guc_hwconfig_size(struct xe_guc *guc); void xe_guc_hwconfig_copy(struct xe_guc *guc, void *dst); +void xe_guc_hwconfig_dump(struct xe_guc *guc, struct drm_printer *p); +int xe_guc_hwconfig_lookup_u32(struct xe_guc *guc, u32 attribute, u32 *val); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 32e93a8127d4..def503abeed5 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -1042,7 +1042,7 @@ static void xe_guc_pc_fini_hw(void *arg) return; XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); - XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); + xe_guc_pc_gucrc_disable(pc); XE_WARN_ON(xe_guc_pc_stop(pc)); /* Bind requested freq to mert_freq_cap before unload */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 460808507947..fbbe6a487bbb 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -284,7 +284,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) free_submit_wq(guc); } -static void guc_submit_wedged_fini(struct drm_device *drm, void *arg) +static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; struct xe_exec_queue *q; @@ -877,7 +877,7 @@ void xe_guc_submit_wedge(struct xe_guc *guc) xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); - err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); @@ -1374,9 +1374,11 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) struct xe_exec_queue *q = msg->private_data; if (guc_exec_queue_allowed_to_change_state(q)) { - q->guc->resume_time = RESUME_PENDING; clear_exec_queue_suspended(q); - enable_scheduling(q); + if (!exec_queue_enabled(q)) { + q->guc->resume_time = RESUME_PENDING; + enable_scheduling(q); + } } else { clear_exec_queue_suspended(q); } @@ -1386,6 +1388,8 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) #define SET_SCHED_PROPS 2 #define SUSPEND 3 #define RESUME 4 +#define OPCODE_MASK 0xf +#define MSG_LOCKED BIT(8) static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) { @@ -1430,7 +1434,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) struct xe_device *xe = guc_to_xe(guc); struct xe_guc_exec_queue *ge; long timeout; - int err; + int err, i; xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); @@ -1442,6 +1446,9 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) ge->q = q; init_waitqueue_head(&ge->suspend_wait); + for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i) + INIT_LIST_HEAD(&ge->static_msgs[i].link); + timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(q->sched_props.job_timeout_ms); err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, @@ -1504,11 +1511,26 @@ static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); INIT_LIST_HEAD(&msg->link); - msg->opcode = opcode; + msg->opcode = opcode & OPCODE_MASK; msg->private_data = q; trace_xe_sched_msg_add(msg); - xe_sched_add_msg(&q->guc->sched, msg); + if (opcode & MSG_LOCKED) + xe_sched_add_msg_locked(&q->guc->sched, msg); + else + xe_sched_add_msg(&q->guc->sched, msg); +} + +static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q, + struct xe_sched_msg *msg, + u32 opcode) +{ + if (!list_empty(&msg->link)) + return false; + + guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED); + + return true; } #define STATIC_MSG_CLEANUP 0 @@ -1582,13 +1604,16 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, static int guc_exec_queue_suspend(struct xe_exec_queue *q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; - if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending) + if (exec_queue_killed_or_banned_or_wedged(q)) return -EINVAL; - q->guc->suspend_pending = true; - guc_exec_queue_add_msg(q, msg, SUSPEND); + xe_sched_msg_lock(sched); + if (guc_exec_queue_try_add_msg(q, msg, SUSPEND)) + q->guc->suspend_pending = true; + xe_sched_msg_unlock(sched); return 0; } @@ -1603,11 +1628,11 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) * suspend_pending upon kill but to be paranoid but races in which * suspend_pending is set after kill also check kill here. */ - ret = wait_event_timeout(q->guc->suspend_wait, - !READ_ONCE(q->guc->suspend_pending) || - exec_queue_killed(q) || - guc_read_stopped(guc), - HZ * 5); + ret = wait_event_interruptible_timeout(q->guc->suspend_wait, + !READ_ONCE(q->guc->suspend_pending) || + exec_queue_killed(q) || + guc_read_stopped(guc), + HZ * 5); if (!ret) { xe_gt_warn(guc_to_gt(guc), @@ -1617,18 +1642,21 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q) return -ETIME; } - return 0; + return ret < 0 ? ret : 0; } static void guc_exec_queue_resume(struct xe_exec_queue *q) { + struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_device *xe = guc_to_xe(guc); xe_assert(xe, !q->guc->suspend_pending); - guc_exec_queue_add_msg(q, msg, RESUME); + xe_sched_msg_lock(sched); + guc_exec_queue_try_add_msg(q, msg, RESUME); + xe_sched_msg_unlock(sched); } static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index bec4366e5513..f5459f97af23 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -43,14 +43,6 @@ huc_to_guc(struct xe_huc *huc) return &container_of(huc, struct xe_uc, huc)->guc; } -static void free_gsc_pkt(struct drm_device *drm, void *arg) -{ - struct xe_huc *huc = arg; - - xe_bo_unpin_map_no_vm(huc->gsc_pkt); - huc->gsc_pkt = NULL; -} - #define PXP43_HUC_AUTH_INOUT_SIZE SZ_4K static int huc_alloc_gsc_pkt(struct xe_huc *huc) { @@ -59,17 +51,16 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) struct xe_bo *bo; /* we use a single object for both input and output */ - bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, - PXP43_HUC_AUTH_INOUT_SIZE * 2, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT); + bo = xe_managed_bo_create_pin_map(xe, gt_to_tile(gt), + PXP43_HUC_AUTH_INOUT_SIZE * 2, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); huc->gsc_pkt = bo; - return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); + return 0; } int xe_huc_init(struct xe_huc *huc) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 07ed9fd28f19..18980238a2ea 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -5,7 +5,10 @@ #include "xe_hw_engine.h" +#include <linux/nospec.h> + #include <drm/drm_managed.h> +#include <drm/xe_drm.h> #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" @@ -20,6 +23,7 @@ #include "xe_gt_printk.h" #include "xe_gt_mcr.h" #include "xe_gt_topology.h" +#include "xe_hw_engine_group.h" #include "xe_hw_fence.h" #include "xe_irq.h" #include "xe_lrc.h" @@ -263,7 +267,7 @@ static const struct engine_info engine_infos[] = { }, }; -static void hw_engine_fini(struct drm_device *drm, void *arg) +static void hw_engine_fini(void *arg) { struct xe_hw_engine *hwe = arg; @@ -274,8 +278,18 @@ static void hw_engine_fini(struct drm_device *drm, void *arg) hwe->gt = NULL; } -static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, - u32 val) +/** + * xe_hw_engine_mmio_write32() - Write engine register + * @hwe: engine + * @reg: register to write into + * @val: desired 32-bit value to write + * + * This function will write val into an engine specific register. + * Forcewake must be held by the caller. + * + */ +void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, + struct xe_reg reg, u32 val) { xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); @@ -285,7 +299,17 @@ static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, xe_mmio_write32(hwe->gt, reg, val); } -static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) +/** + * xe_hw_engine_mmio_read32() - Read engine register + * @hwe: engine + * @reg: register to read from + * + * This function will read from an engine specific register. + * Forcewake must be held by the caller. + * + * Return: value of the 32-bit register. + */ +u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg) { xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base)); xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain); @@ -304,14 +328,14 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe) xe_mmio_write32(hwe->gt, RCU_MODE, _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); - hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); - hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), - xe_bo_ggtt_addr(hwe->hwsp)); - hw_engine_mmio_write32(hwe, RING_MODE(0), - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); - hw_engine_mmio_write32(hwe, RING_MI_MODE(0), - _MASKED_BIT_DISABLE(STOP_RING)); - hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); + xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), + xe_bo_ggtt_addr(hwe->hwsp)); + xe_hw_engine_mmio_write32(hwe, RING_MODE(0), + _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); + xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), + _MASKED_BIT_DISABLE(STOP_RING)); + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); } static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, @@ -425,6 +449,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) 0xA, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + /* Enable Priority Mem Read */ + { XE_RTP_NAME("Priority_Mem_Read"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, {} }; @@ -555,7 +585,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) gt->usm.reserved_bcs_instance = hwe->instance; - return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); + return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe); err_kernel_lrc: xe_lrc_put(hwe->kernel_lrc); @@ -761,6 +791,9 @@ int xe_hw_engines_init(struct xe_gt *gt) } hw_engine_setup_logical_mapping(gt); + err = xe_hw_engine_setup_groups(gt); + if (err) + return err; return 0; } @@ -791,7 +824,7 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, unsigned int dss; u16 group, instance; - snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); + snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0)); if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER) return; @@ -887,53 +920,53 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) return snapshot; snapshot->reg.ring_execlist_status = - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); - val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); snapshot->reg.ring_execlist_status |= val << 32; snapshot->reg.ring_execlist_sq_contents = - hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); - val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); snapshot->reg.ring_execlist_sq_contents |= val << 32; - snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); - val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); + snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); snapshot->reg.ring_acthd |= val << 32; - snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); - val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); + snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); snapshot->reg.ring_bbaddr |= val << 32; snapshot->reg.ring_dma_fadd = - hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); - val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); + xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); snapshot->reg.ring_dma_fadd |= val << 32; - snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); - snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); - snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); + snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); + snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); + snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0)); if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) { - val = hw_engine_mmio_read32(hwe, RING_START_UDW(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0)); snapshot->reg.ring_start |= val << 32; } if (xe_gt_has_indirect_ring_state(hwe->gt)) { snapshot->reg.indirect_ring_state = - hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); + xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); } snapshot->reg.ring_head = - hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; + xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; snapshot->reg.ring_tail = - hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; - snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0)); + xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; + snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0)); snapshot->reg.ring_mi_mode = - hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); - snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0)); - snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0)); - snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0)); - snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); - snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); - snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0)); + snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0)); + snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0)); + snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0)); + snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0)); + snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0)); xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) @@ -1135,3 +1168,41 @@ enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe) { return engine_infos[hwe->engine_id].domain; } + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +/** + * xe_hw_engine_lookup() - Lookup hardware engine for class:instance + * @xe: xe device + * @eci: engine class and instance + * + * This function will find a hardware engine for given engine + * class and instance. + * + * Return: If found xe_hw_engine pointer, NULL otherwise. + */ +struct xe_hw_engine * +xe_hw_engine_lookup(struct xe_device *xe, + struct drm_xe_engine_class_instance eci) +{ + unsigned int idx; + + if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) + return NULL; + + if (eci.gt_id >= xe->info.gt_count) + return NULL; + + idx = array_index_nospec(eci.engine_class, + ARRAY_SIZE(user_to_xe_engine_class)); + + return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), + user_to_xe_engine_class[idx], + eci.engine_instance, true); +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index 900c8c991430..022819a4a8eb 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -9,6 +9,8 @@ #include "xe_hw_engine_types.h" struct drm_printer; +struct drm_xe_engine_class_instance; +struct xe_device; #ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN #define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN @@ -62,6 +64,11 @@ void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p); void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe); bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe); + +struct xe_hw_engine * +xe_hw_engine_lookup(struct xe_device *xe, + struct drm_xe_engine_class_instance eci); + static inline bool xe_hw_engine_is_valid(struct xe_hw_engine *hwe) { return hwe->name; @@ -71,4 +78,7 @@ const char *xe_hw_engine_class_to_str(enum xe_engine_class class); u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe); enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe); +void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg, u32 val); +u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg); + #endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c new file mode 100644 index 000000000000..82750520a90a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_gt.h" +#include "xe_hw_engine_group.h" +#include "xe_vm.h" + +static void +hw_engine_group_free(struct drm_device *drm, void *arg) +{ + struct xe_hw_engine_group *group = arg; + + destroy_workqueue(group->resume_wq); + kfree(group); +} + +static void +hw_engine_group_resume_lr_jobs_func(struct work_struct *w) +{ + struct xe_exec_queue *q; + struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work); + int err; + enum xe_hw_engine_group_execution_mode previous_mode; + + err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); + if (err) + return; + + if (previous_mode == EXEC_MODE_LR) + goto put; + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + q->ops->resume(q); + } + +put: + xe_hw_engine_group_put(group); +} + +static struct xe_hw_engine_group * +hw_engine_group_alloc(struct xe_device *xe) +{ + struct xe_hw_engine_group *group; + int err; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); + if (!group->resume_wq) + return ERR_PTR(-ENOMEM); + + init_rwsem(&group->mode_sem); + INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); + INIT_LIST_HEAD(&group->exec_queue_list); + + err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); + if (err) + return ERR_PTR(err); + + return group; +} + +/** + * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt + * @gt: The gt for which groups are setup + * + * Return: 0 on success, negative error code on error. + */ +int xe_hw_engine_setup_groups(struct xe_gt *gt) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; + struct xe_device *xe = gt_to_xe(gt); + int err; + + group_rcs_ccs = hw_engine_group_alloc(xe); + if (IS_ERR(group_rcs_ccs)) { + err = PTR_ERR(group_rcs_ccs); + goto err_group_rcs_ccs; + } + + group_bcs = hw_engine_group_alloc(xe); + if (IS_ERR(group_bcs)) { + err = PTR_ERR(group_bcs); + goto err_group_bcs; + } + + group_vcs_vecs = hw_engine_group_alloc(xe); + if (IS_ERR(group_vcs_vecs)) { + err = PTR_ERR(group_vcs_vecs); + goto err_group_vcs_vecs; + } + + for_each_hw_engine(hwe, gt, id) { + switch (hwe->class) { + case XE_ENGINE_CLASS_COPY: + hwe->hw_engine_group = group_bcs; + break; + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + hwe->hw_engine_group = group_rcs_ccs; + break; + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + hwe->hw_engine_group = group_vcs_vecs; + break; + case XE_ENGINE_CLASS_OTHER: + break; + default: + drm_warn(&xe->drm, "NOT POSSIBLE"); + } + } + + return 0; + +err_group_vcs_vecs: + kfree(group_vcs_vecs); +err_group_bcs: + kfree(group_bcs); +err_group_rcs_ccs: + kfree(group_rcs_ccs); + + return err; +} + +/** + * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group + * @group: The hw engine group + * @q: The exec_queue + * + * Return: 0 on success, + * -EINTR if the lock could not be acquired + */ +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) +{ + int err; + struct xe_device *xe = gt_to_xe(q->gt); + + xe_assert(xe, group); + xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); + xe_assert(xe, q->vm); + + if (xe_vm_in_preempt_fence_mode(q->vm)) + return 0; + + err = down_write_killable(&group->mode_sem); + if (err) + return err; + + if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) { + q->ops->suspend(q); + err = q->ops->suspend_wait(q); + if (err) + goto err_suspend; + + xe_hw_engine_group_resume_faulting_lr_jobs(group); + } + + list_add(&q->hw_engine_group_link, &group->exec_queue_list); + up_write(&group->mode_sem); + + return 0; + +err_suspend: + up_write(&group->mode_sem); + return err; +} + +/** + * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group + * @group: The hw engine group + * @q: The exec_queue + */ +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) +{ + struct xe_device *xe = gt_to_xe(q->gt); + + xe_assert(xe, group); + xe_assert(xe, q->vm); + + down_write(&group->mode_sem); + + if (!list_empty(&q->hw_engine_group_link)) + list_del(&q->hw_engine_group_link); + + up_write(&group->mode_sem); +} + +/** + * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's + * faulting LR jobs + * @group: The hw engine group + */ +void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group) +{ + queue_work(group->resume_wq, &group->resume_work); +} + +/** + * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group + * @group: The hw engine group + * + * Return: 0 on success, negative error code on error. + */ +static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) +{ + int err; + struct xe_exec_queue *q; + bool need_resume = false; + + lockdep_assert_held_write(&group->mode_sem); + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + need_resume = true; + q->ops->suspend(q); + } + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (!xe_vm_in_fault_mode(q->vm)) + continue; + + err = q->ops->suspend_wait(q); + if (err) + goto err_suspend; + } + + if (need_resume) + xe_hw_engine_group_resume_faulting_lr_jobs(group); + + return 0; + +err_suspend: + up_write(&group->mode_sem); + return err; +} + +/** + * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete + * @group: The hw engine group + * + * This function is not meant to be called directly from a user IOCTL as dma_fence_wait() + * is not interruptible. + * + * Return: 0 on success, + * -ETIME if waiting for one job failed + */ +static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group) +{ + long timeout; + struct xe_exec_queue *q; + struct dma_fence *fence; + + lockdep_assert_held_write(&group->mode_sem); + + list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { + if (xe_vm_in_lr_mode(q->vm)) + continue; + + fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm); + timeout = dma_fence_wait(fence, false); + dma_fence_put(fence); + + if (timeout < 0) + return -ETIME; + } + + return 0; +} + +static int switch_mode(struct xe_hw_engine_group *group) +{ + int err = 0; + enum xe_hw_engine_group_execution_mode new_mode; + + lockdep_assert_held_write(&group->mode_sem); + + switch (group->cur_mode) { + case EXEC_MODE_LR: + new_mode = EXEC_MODE_DMA_FENCE; + err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); + break; + case EXEC_MODE_DMA_FENCE: + new_mode = EXEC_MODE_LR; + err = xe_hw_engine_group_wait_for_dma_fence_jobs(group); + break; + } + + if (err) + return err; + + group->cur_mode = new_mode; + + return 0; +} + +/** + * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode + * @group: The hw engine group + * @new_mode: The new execution mode + * @previous_mode: Pointer to the previous mode provided for use by caller + * + * Return: 0 if successful, -EINTR if locking failed. + */ +int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, + enum xe_hw_engine_group_execution_mode new_mode, + enum xe_hw_engine_group_execution_mode *previous_mode) +__acquires(&group->mode_sem) +{ + int err = down_read_interruptible(&group->mode_sem); + + if (err) + return err; + + *previous_mode = group->cur_mode; + + if (new_mode != group->cur_mode) { + up_read(&group->mode_sem); + err = down_write_killable(&group->mode_sem); + if (err) + return err; + + if (new_mode != group->cur_mode) { + err = switch_mode(group); + if (err) { + up_write(&group->mode_sem); + return err; + } + } + downgrade_write(&group->mode_sem); + } + + return err; +} + +/** + * xe_hw_engine_group_put() - Put the group + * @group: The hw engine group + */ +void xe_hw_engine_group_put(struct xe_hw_engine_group *group) +__releases(&group->mode_sem) +{ + up_read(&group->mode_sem); +} + +/** + * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue + * @q: The exec_queue + */ +enum xe_hw_engine_group_execution_mode +xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q) +{ + if (xe_vm_in_fault_mode(q->vm)) + return EXEC_MODE_LR; + else + return EXEC_MODE_DMA_FENCE; +} diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h new file mode 100644 index 000000000000..797ee81acbf2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_GROUP_H_ +#define _XE_HW_ENGINE_GROUP_H_ + +#include "xe_hw_engine_group_types.h" + +struct drm_device; +struct xe_exec_queue; +struct xe_gt; + +int xe_hw_engine_setup_groups(struct xe_gt *gt); + +int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); +void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q); + +int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, + enum xe_hw_engine_group_execution_mode new_mode, + enum xe_hw_engine_group_execution_mode *previous_mode); +void xe_hw_engine_group_put(struct xe_hw_engine_group *group); + +enum xe_hw_engine_group_execution_mode +xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q); +void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group); + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group_types.h b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h new file mode 100644 index 000000000000..92b6e0712c03 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_HW_ENGINE_GROUP_TYPES_H_ +#define _XE_HW_ENGINE_GROUP_TYPES_H_ + +#include "xe_force_wake_types.h" +#include "xe_lrc_types.h" +#include "xe_reg_sr_types.h" + +/** + * enum xe_hw_engine_group_execution_mode - possible execution modes of a hw + * engine group + * + * @EXEC_MODE_LR: execution in long-running mode + * @EXEC_MODE_DMA_FENCE: execution in dma fence mode + */ +enum xe_hw_engine_group_execution_mode { + EXEC_MODE_LR, + EXEC_MODE_DMA_FENCE, +}; + +/** + * struct xe_hw_engine_group - Hardware engine group + * + * hw engines belong to the same group if they share hardware resources in a way + * that prevents them from making progress when one is stuck on a page fault. + */ +struct xe_hw_engine_group { + /** + * @exec_queue_list: list of exec queues attached to this + * xe_hw_engine_group + */ + struct list_head exec_queue_list; + /** @resume_work: worker to resume faulting LR exec queues */ + struct work_struct resume_work; + /** @resume_wq: workqueue to resume faulting LR exec queues */ + struct workqueue_struct *resume_wq; + /** + * @mode_sem: used to protect this group's hardware resources and ensure + * mutual exclusion between execution only in faulting LR mode and + * execution only in DMA_FENCE mode + */ + struct rw_semaphore mode_sem; + /** @cur_mode: current execution mode of this hw engine group */ + enum xe_hw_engine_group_execution_mode cur_mode; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 70e6434f150d..39f24012d0f4 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -150,6 +150,8 @@ struct xe_hw_engine { struct xe_hw_engine_class_intf *eclass; /** @oa_unit: oa unit for this hw engine */ struct xe_oa_unit *oa_unit; + /** @hw_engine_group: the group of hw engines this one belongs to */ + struct xe_hw_engine_group *hw_engine_group; }; /** diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 45a9789cf501..0b4f12be3692 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -148,20 +148,20 @@ static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev); + return dev_name(fence->xe->drm.dev); } static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return fence->ctx->name; + return fence->name; } static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - struct xe_device *xe = gt_to_xe(fence->ctx->gt); + struct xe_device *xe = fence->xe; u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); return dma_fence->error || @@ -253,7 +253,8 @@ void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx, struct xe_hw_fence *hw_fence = container_of(fence, typeof(*hw_fence), dma); - hw_fence->ctx = ctx; + hw_fence->xe = gt_to_xe(ctx->gt); + snprintf(hw_fence->name, sizeof(hw_fence->name), "%s", ctx->name); hw_fence->seqno_map = seqno_map; INIT_LIST_HEAD(&hw_fence->irq_link); diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index b33c4956e8ea..364a61f4bfda 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -12,6 +12,7 @@ #include <linux/list.h> #include <linux/spinlock.h> +struct xe_device; struct xe_gt; /** @@ -61,8 +62,10 @@ struct xe_hw_fence_ctx { struct xe_hw_fence { /** @dma: base dma fence for hardware fence context */ struct dma_fence dma; - /** @ctx: hardware fence context */ - struct xe_hw_fence_ctx *ctx; + /** @xe: Xe device for hw fence driver name */ + struct xe_device *xe; + /** @name: name of hardware fence context */ + char name[MAX_FENCE_NAME_LEN]; /** @seqno_map: I/O map for seqno */ struct iosys_map seqno_map; /** @irq_link: Link in struct xe_hw_fence_irq.pending */ diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 0c8ce09e5025..832ea81faeee 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -203,9 +203,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN, 0); reg_val = xe_mmio_read32(hwmon->gt, rapl_limit); if (reg_val & PKG_PWR_LIM_1_EN) { + drm_warn(>_to_xe(hwmon->gt)->drm, "PL1 disable is not supported!\n"); ret = -EOPNOTSUPP; - goto unlock; } + goto unlock; } /* Computation in 64-bits to avoid overflow. Round to nearest. */ diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 94ff62e1d95e..aec7db39c061 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -5,6 +5,8 @@ #include "xe_lrc.h" +#include <generated/xe_wa_oob.h> + #include <linux/ascii85.h> #include "instructions/xe_mi_commands.h" @@ -24,6 +26,7 @@ #include "xe_memirq.h" #include "xe_sriov.h" #include "xe_vm.h" +#include "xe_wa.h" #define LRC_VALID BIT_ULL(0) #define LRC_PRIVILEGE BIT_ULL(8) @@ -1581,19 +1584,31 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b int state_table_size = 0; /* - * At the moment we only need to emit non-register state for the RCS - * engine. + * Wa_14019789679 + * + * If the driver doesn't explicitly emit the SVG instructions while + * setting up the default LRC, the context switch will write 0's + * (noops) into the LRC memory rather than the expected instruction + * headers. Application contexts start out as a copy of the default + * LRC, and if they also do not emit specific settings for some SVG + * state, then on context restore they'll unintentionally inherit + * whatever state setting the previous context had programmed into the + * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will + * prevent the hardware from resetting that state back to any specific + * value). + * + * The official workaround only requires emitting 3DSTATE_MESH_CONTROL + * since that's a specific state setting that can easily cause GPU + * hangs if unintentionally inherited. However to be safe we'll + * continue to emit all of the SVG state since it's best not to leak + * any of the state between contexts, even if that leakage is harmless. */ - if (q->hwe->class != XE_ENGINE_CLASS_RENDER) - return; - - switch (GRAPHICS_VERx100(xe)) { - case 1255: - case 1270 ... 2004: + if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { state_table = xe_hpg_svg_state; state_table_size = ARRAY_SIZE(xe_hpg_svg_state); - break; - default: + } + + if (!state_table) { xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); return; @@ -1634,6 +1649,9 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; + if (lrc->bo->vm) + xe_vm_get(lrc->bo->vm); + snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); @@ -1653,12 +1671,14 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) { struct xe_bo *bo; + struct xe_vm *vm; struct iosys_map src; if (!snapshot) return; bo = snapshot->lrc_bo; + vm = bo->vm; snapshot->lrc_bo = NULL; snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); @@ -1678,6 +1698,8 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) xe_bo_unlock(bo); put_bo: xe_bo_put(bo); + if (vm) + xe_vm_put(vm); } void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) @@ -1727,8 +1749,14 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) return; kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) + if (snapshot->lrc_bo) { + struct xe_vm *vm; + + vm = snapshot->lrc_bo->vm; xe_bo_put(snapshot->lrc_bo); + if (vm) + xe_vm_put(vm); + } kfree(snapshot); } diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 6f24aaf58252..cbf54be224c9 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -442,7 +442,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, EXEC_QUEUE_FLAG_KERNEL | - EXEC_QUEUE_FLAG_PERMANENT); + EXEC_QUEUE_FLAG_PERMANENT, 0); } if (IS_ERR(m->q)) { xe_vm_close_and_put(vm); @@ -1037,9 +1037,11 @@ static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, * @m: The migration context. * @bo: The buffer object @dst is currently bound to. * @dst: The dst TTM resource to be cleared. + * @clear_flags: flags to specify which data to clear: CCS, BO, or both. * - * Clear the contents of @dst to zero. On flat CCS devices, - * the CCS metadata is cleared to zero as well on VRAM destinations. + * Clear the contents of @dst to zero when XE_MIGRATE_CLEAR_FLAG_BO_DATA is set. + * On flat CCS devices, the CCS metadata is cleared to zero with XE_MIGRATE_CLEAR_FLAG_CCS_DATA. + * Set XE_MIGRATE_CLEAR_FLAG_FULL to clear bo as well as CCS metadata. * TODO: Eliminate the @bo argument. * * Return: Pointer to a dma_fence representing the last clear batch, or @@ -1048,18 +1050,27 @@ static void emit_clear(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs, */ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst) + struct ttm_resource *dst, + u32 clear_flags) { bool clear_vram = mem_type_is_vram(dst->mem_type); + bool clear_bo_data = XE_MIGRATE_CLEAR_FLAG_BO_DATA & clear_flags; + bool clear_ccs = XE_MIGRATE_CLEAR_FLAG_CCS_DATA & clear_flags; struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); - bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false; + bool clear_only_system_ccs = false; struct dma_fence *fence = NULL; u64 size = bo->size; struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; + if (WARN_ON(!clear_bo_data && !clear_ccs)) + return NULL; + + if (!clear_bo_data && clear_ccs && !IS_DGFX(xe)) + clear_only_system_ccs = true; + if (!clear_vram) xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); else @@ -1085,7 +1096,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, batch_size = 2 + pte_update_size(m, pte_flags, src, &src_it, &clear_L0, &clear_L0_ofs, &clear_L0_pt, - clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0, + clear_bo_data ? emit_clear_cmd_len(gt) : 0, 0, avail_pts); if (xe_migrate_needs_ccs_emit(xe)) @@ -1107,13 +1118,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) xe_res_next(&src_it, clear_L0); else - emit_pte(m, bb, clear_L0_pt, clear_vram, clear_system_ccs, + emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs, &src_it, clear_L0, dst); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; update_idx = bb->len; - if (!clear_system_ccs) + if (clear_bo_data) emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram); if (xe_migrate_needs_ccs_emit(xe)) { @@ -1172,7 +1183,7 @@ err_sync: return ERR_PTR(err); } - if (clear_system_ccs) + if (clear_ccs) bo->ccs_cleared = true; return fence; diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h index 453e0ecf5034..0109866e398a 100644 --- a/drivers/gpu/drm/xe/xe_migrate.h +++ b/drivers/gpu/drm/xe/xe_migrate.h @@ -6,7 +6,7 @@ #ifndef _XE_MIGRATE_ #define _XE_MIGRATE_ -#include <drm/drm_mm.h> +#include <linux/types.h> struct dma_fence; struct iosys_map; @@ -102,9 +102,14 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct ttm_resource *dst, bool copy_only_ccs); +#define XE_MIGRATE_CLEAR_FLAG_BO_DATA BIT(0) +#define XE_MIGRATE_CLEAR_FLAG_CCS_DATA BIT(1) +#define XE_MIGRATE_CLEAR_FLAG_FULL (XE_MIGRATE_CLEAR_FLAG_BO_DATA | \ + XE_MIGRATE_CLEAR_FLAG_CCS_DATA) struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_bo *bo, - struct ttm_resource *dst); + struct ttm_resource *dst, + u32 clear_flags); struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m); diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index bdcc7282385c..3fd462fda625 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -29,7 +29,7 @@ static void tiles_fini(void *arg) struct xe_tile *tile; int id; - for_each_tile(tile, xe, id) + for_each_remote_tile(tile, xe, id) tile->mmio.regs = NULL; } @@ -146,9 +146,11 @@ int xe_mmio_probe_tiles(struct xe_device *xe) static void mmio_fini(void *arg) { struct xe_device *xe = arg; + struct xe_tile *root_tile = xe_device_get_root_tile(xe); pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); xe->mmio.regs = NULL; + root_tile->mmio.regs = NULL; } int xe_mmio_init(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 499540add465..bfc3deebdaa2 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -8,14 +8,17 @@ #include <linux/init.h> #include <linux/module.h> +#include <drm/drm_module.h> + #include "xe_drv.h" #include "xe_hw_fence.h" #include "xe_pci.h" +#include "xe_pm.h" #include "xe_observation.h" #include "xe_sched_job.h" struct xe_modparam xe_modparam = { - .enable_display = true, + .probe_display = true, .guc_log_level = 5, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, .wedged_mode = 1, @@ -25,8 +28,8 @@ struct xe_modparam xe_modparam = { module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); -module_param_named(enable_display, xe_modparam.enable_display, bool, 0444); -MODULE_PARM_DESC(enable_display, "Enable display"); +module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); +MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, uint, 0600); MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size(in MiB)"); @@ -61,13 +64,28 @@ module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); MODULE_PARM_DESC(wedged_mode, "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); +static int xe_check_nomodeset(void) +{ + if (drm_firmware_drivers_only()) + return -ENODEV; + + return 0; +} + struct init_funcs { int (*init)(void); void (*exit)(void); }; +static void xe_dummy_exit(void) +{ +} + static const struct init_funcs init_funcs[] = { { + .init = xe_check_nomodeset, + }, + { .init = xe_hw_fence_module_init, .exit = xe_hw_fence_module_exit, }, @@ -83,17 +101,41 @@ static const struct init_funcs init_funcs[] = { .init = xe_observation_sysctl_register, .exit = xe_observation_sysctl_unregister, }, + { + .init = xe_pm_module_init, + .exit = xe_dummy_exit, + }, }; +static int __init xe_call_init_func(unsigned int i) +{ + if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) + return 0; + if (!init_funcs[i].init) + return 0; + + return init_funcs[i].init(); +} + +static void xe_call_exit_func(unsigned int i) +{ + if (WARN_ON(i >= ARRAY_SIZE(init_funcs))) + return; + if (!init_funcs[i].exit) + return; + + init_funcs[i].exit(); +} + static int __init xe_init(void) { int err, i; for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { - err = init_funcs[i].init(); + err = xe_call_init_func(i); if (err) { while (i--) - init_funcs[i].exit(); + xe_call_exit_func(i); return err; } } @@ -106,7 +148,7 @@ static void __exit xe_exit(void) int i; for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--) - init_funcs[i].exit(); + xe_call_exit_func(i); } module_init(xe_init); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 61a0d28a28c8..161a5e6f717f 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -11,7 +11,7 @@ /* Module modprobe variables */ struct xe_modparam { bool force_execlist; - bool enable_display; + bool probe_display; u32 force_vram_bar_size; int guc_log_level; char *guc_firmware_path; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 3ef92eb8fbb1..4d4541e0b24c 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1244,8 +1244,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, VM_MAYWRITE | VM_MAYEXEC); - xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == - (vma->vm_end - vma->vm_start) >> PAGE_SHIFT); + xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma)); for (i = 0; i < bo->ttm.ttm->num_pages; i++) { ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), PAGE_SIZE, vma->vm_page_prot); diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index fcb584b42a7d..a78c92a44ec2 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -66,7 +66,6 @@ static struct ctl_table observation_ctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - {} }; /** diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3c4a3c91377a..f276194d9c4e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -338,14 +338,12 @@ static const struct xe_device_desc mtl_desc = { static const struct xe_device_desc lnl_desc = { PLATFORM(LUNARLAKE), .has_display = true, - .require_force_probe = true, }; static const struct xe_device_desc bmg_desc = { DGFX_FEATURES, PLATFORM(BATTLEMAGE), .has_display = true, - .require_force_probe = true, .has_heci_cscfi = 1, }; @@ -616,9 +614,9 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.skip_mtcfg = desc->skip_mtcfg; xe->info.skip_pcode = desc->skip_pcode; - xe->info.enable_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && - xe_modparam.enable_display && - desc->has_display; + xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && + xe_modparam.probe_display && + desc->has_display; err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); if (err) @@ -747,7 +745,7 @@ static void xe_pci_remove(struct pci_dev *pdev) { struct xe_device *xe; - xe = pci_get_drvdata(pdev); + xe = pdev_to_xe_device(pdev); if (!xe) /* driver load aborted, nothing to cleanup */ return; @@ -829,14 +827,13 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) xe->info.media_name, xe->info.media_verx100 / 100, xe->info.media_verx100 % 100, - str_yes_no(xe->info.enable_display), + str_yes_no(xe->info.probe_display), xe->info.dma_mask_size, xe->info.tile_count, xe->info.has_heci_gscfi, xe->info.has_heci_cscfi); - drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n", + drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, B:%s)\n", xe_step_name(xe->info.step.graphics), xe_step_name(xe->info.step.media), - xe_step_name(xe->info.step.display), xe_step_name(xe->info.step.basedie)); drm_dbg(&xe->drm, "SR-IOV support: %s (mode: %s)\n", diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 9f3c14fd9f33..2e2accd76fb2 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -70,11 +70,34 @@ */ #ifdef CONFIG_LOCKDEP -static struct lockdep_map xe_pm_runtime_lockdep_map = { - .name = "xe_pm_runtime_lockdep_map" +static struct lockdep_map xe_pm_runtime_d3cold_map = { + .name = "xe_rpm_d3cold_map" +}; + +static struct lockdep_map xe_pm_runtime_nod3cold_map = { + .name = "xe_rpm_nod3cold_map" }; #endif +static bool __maybe_unused xe_rpm_reclaim_safe(const struct xe_device *xe) +{ + return !xe->d3cold.capable && !xe->info.has_sriov; +} + +static void xe_rpm_lockmap_acquire(const struct xe_device *xe) +{ + lock_map_acquire(xe_rpm_reclaim_safe(xe) ? + &xe_pm_runtime_nod3cold_map : + &xe_pm_runtime_d3cold_map); +} + +static void xe_rpm_lockmap_release(const struct xe_device *xe) +{ + lock_map_release(xe_rpm_reclaim_safe(xe) ? + &xe_pm_runtime_nod3cold_map : + &xe_pm_runtime_d3cold_map); +} + /** * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle * @xe: xe device instance @@ -93,13 +116,13 @@ int xe_pm_suspend(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); + xe_display_pm_suspend(xe, false); + /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) goto err; - xe_display_pm_suspend(xe, false); - for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); if (err) { @@ -154,11 +177,11 @@ int xe_pm_resume(struct xe_device *xe) xe_irq_resume(xe); - xe_display_pm_resume(xe, false); - for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_resume(xe, false); + err = xe_bo_restore_user(xe); if (err) goto err; @@ -354,7 +377,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) * annotation here and in xe_pm_runtime_get() lockdep will see * the potential lock inversion and give us a nice splat. */ - lock_map_acquire(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); /* * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify @@ -366,11 +389,14 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_bo_runtime_pm_release_mmap_offset(bo); mutex_unlock(&xe->mem_access.vram_userfault.lock); + xe_display_pm_runtime_suspend(xe); + if (xe->d3cold.allowed) { + xe_display_pm_suspend(xe, true); + err = xe_bo_evict_all(xe); if (err) goto out; - xe_display_pm_suspend(xe, true); } for_each_gt(gt, xe, id) { @@ -386,7 +412,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) out: if (err) xe_display_pm_resume(xe, true); - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; } @@ -407,7 +433,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); - lock_map_acquire(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); if (xe->d3cold.allowed) { err = xe_pcode_ready(xe, true); @@ -430,14 +456,16 @@ int xe_pm_runtime_resume(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_runtime_resume(xe); + if (xe->d3cold.allowed) { - xe_display_pm_resume(xe, true); err = xe_bo_restore_user(xe); if (err) goto out; } + out: - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; } @@ -451,15 +479,37 @@ out: * stuff that can happen inside the runtime_resume callback by acquiring * a dummy lock (it doesn't protect anything and gets compiled out on * non-debug builds). Lockdep then only needs to see the - * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can - * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map. + * xe_pm_runtime_xxx_map -> runtime_resume callback once, and then can + * hopefully validate all the (callers_locks) -> xe_pm_runtime_xxx_map. * For example if the (callers_locks) are ever grabbed in the * runtime_resume callback, lockdep should give us a nice splat. */ -static void pm_runtime_lockdep_prime(void) +static void xe_rpm_might_enter_cb(const struct xe_device *xe) { - lock_map_acquire(&xe_pm_runtime_lockdep_map); - lock_map_release(&xe_pm_runtime_lockdep_map); + xe_rpm_lockmap_acquire(xe); + xe_rpm_lockmap_release(xe); +} + +/* + * Prime the lockdep maps for known locking orders that need to + * be supported but that may not always occur on all systems. + */ +static void xe_pm_runtime_lockdep_prime(void) +{ + struct dma_resv lockdep_resv; + + dma_resv_init(&lockdep_resv); + lock_map_acquire(&xe_pm_runtime_d3cold_map); + /* D3Cold takes the dma_resv locks to evict bos */ + dma_resv_lock(&lockdep_resv, NULL); + dma_resv_unlock(&lockdep_resv); + lock_map_release(&xe_pm_runtime_d3cold_map); + + /* Shrinkers might like to wake up the device under reclaim. */ + fs_reclaim_acquire(GFP_KERNEL); + lock_map_acquire(&xe_pm_runtime_nod3cold_map); + lock_map_release(&xe_pm_runtime_nod3cold_map); + fs_reclaim_release(GFP_KERNEL); } /** @@ -474,7 +524,7 @@ void xe_pm_runtime_get(struct xe_device *xe) if (xe_pm_read_callback_task(xe) == current) return; - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); pm_runtime_resume(xe->drm.dev); } @@ -506,7 +556,7 @@ int xe_pm_runtime_get_ioctl(struct xe_device *xe) if (WARN_ON(xe_pm_read_callback_task(xe) == current)) return -ELOOP; - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); return pm_runtime_get_sync(xe->drm.dev); } @@ -574,7 +624,7 @@ bool xe_pm_runtime_resume_and_get(struct xe_device *xe) return true; } - pm_runtime_lockdep_prime(); + xe_rpm_might_enter_cb(xe); return pm_runtime_resume_and_get(xe->drm.dev) >= 0; } @@ -666,3 +716,14 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) drm_dbg(&xe->drm, "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); } + +/** + * xe_pm_module_init() - Perform xe_pm specific module initialization. + * + * Return: 0 on success. Currently doesn't fail. + */ +int __init xe_pm_module_init(void) +{ + xe_pm_runtime_lockdep_prime(); + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 104a21ae6dfd..9aef673b1c8a 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -32,5 +32,6 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe); int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); struct task_struct *xe_pm_read_callback_task(struct xe_device *xe); +int xe_pm_module_init(void); #endif diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 56e709d2fb30..83fbeea5aa20 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -134,8 +134,9 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, { list_del_init(&pfence->link); pfence->q = xe_exec_queue_get(q); + spin_lock_init(&pfence->lock); dma_fence_init(&pfence->base, &preempt_fence_ops, - &q->lr.lock, context, seqno); + &pfence->lock, context, seqno); return &pfence->base; } diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h index b54b5c29b533..312c3372a49f 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -25,6 +25,8 @@ struct xe_preempt_fence { struct xe_exec_queue *q; /** @preempt_work: work struct which issues preemption */ struct work_struct preempt_work; + /** @lock: dma-fence fence lock */ + spinlock_t lock; /** @error: preempt fence is in error state */ int error; }; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 97a6a0b0b8ba..579ed31b46db 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1149,10 +1149,12 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job, return err; } - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); + if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) { + if (job) + err = xe_sched_job_last_fence_add_dep(job, vm); + else + err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); + } for (i = 0; job && !err && i < vops->num_syncs; i++) err = xe_sync_entry_add_deps(&vops->syncs[i], job); diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h index 655af89b31a9..dca374b6521c 100644 --- a/drivers/gpu/drm/xe/xe_res_cursor.h +++ b/drivers/gpu/drm/xe/xe_res_cursor.h @@ -26,7 +26,6 @@ #include <linux/scatterlist.h> -#include <drm/drm_mm.h> #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_range_manager.h> #include <drm/ttm/ttm_resource.h> diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index f3060979e63f..fe2cb2a96f78 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -25,10 +25,9 @@ static void xe_sa_bo_manager_fini(struct drm_device *drm, void *arg) drm_suballoc_manager_fini(&sa_manager->base); - if (bo->vmap.is_iomem) + if (sa_manager->is_iomem) kvfree(sa_manager->cpu_ptr); - xe_bo_unpin_map_no_vm(bo); sa_manager->bo = NULL; } @@ -47,16 +46,17 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 sa_manager->bo = NULL; - bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo = xe_managed_bo_create_pin_map(xe, tile, size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) { drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", PTR_ERR(bo)); return (struct xe_sa_manager *)bo; } sa_manager->bo = bo; + sa_manager->is_iomem = bo->vmap.is_iomem; drm_suballoc_manager_init(&sa_manager->base, managed_size, align); sa_manager->gpu_addr = xe_bo_ggtt_addr(bo); @@ -64,7 +64,6 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 if (bo->vmap.is_iomem) { sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL); if (!sa_manager->cpu_ptr) { - xe_bo_unpin_map_no_vm(sa_manager->bo); sa_manager->bo = NULL; return ERR_PTR(-ENOMEM); } diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h index 2ef896aeca1d..2b070ff1292e 100644 --- a/drivers/gpu/drm/xe/xe_sa_types.h +++ b/drivers/gpu/drm/xe/xe_sa_types.h @@ -14,6 +14,7 @@ struct xe_sa_manager { struct xe_bo *bo; u64 gpu_addr; void *cpu_ptr; + bool is_iomem; }; #endif diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 44d534e362cd..55d47450b2c6 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -89,8 +89,7 @@ static void xe_sched_job_free_fences(struct xe_sched_job *job) if (ptrs->lrc_fence) xe_lrc_free_seqno_fence(ptrs->lrc_fence); - if (ptrs->chain_fence) - dma_fence_chain_free(ptrs->chain_fence); + dma_fence_chain_free(ptrs->chain_fence); } } @@ -171,12 +170,13 @@ void xe_sched_job_destroy(struct kref *ref) struct xe_sched_job *job = container_of(ref, struct xe_sched_job, refcount); struct xe_device *xe = job_to_xe(job); + struct xe_exec_queue *q = job->q; xe_sched_job_free_fences(job); - xe_exec_queue_put(job->q); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); job_free(job); + xe_exec_queue_put(q); xe_pm_runtime_put(xe); } diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index eaf1b718f26c..c77b5c317fa0 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -28,23 +28,17 @@ * use a macro to define these to make it easier to identify the platforms * where the two steppings can deviate. */ -#define COMMON_GT_MEDIA_STEP(x_) \ - .graphics = STEP_##x_, \ - .media = STEP_##x_ - #define COMMON_STEP(x_) \ - COMMON_GT_MEDIA_STEP(x_), \ .graphics = STEP_##x_, \ - .media = STEP_##x_, \ - .display = STEP_##x_ + .media = STEP_##x_ __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ static const struct xe_step_info tgl_revids[] = { - [0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, - [1] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_D0 }, + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, }; static const struct xe_step_info dg1_revids[] = { @@ -53,49 +47,49 @@ static const struct xe_step_info dg1_revids[] = { }; static const struct xe_step_info adls_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A2 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_B0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(D0) }, }; static const struct xe_step_info adls_rpls_revids[] = { - [0x4] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_D0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(D0), .display = STEP_C0 }, + [0x4] = { COMMON_STEP(D0) }, + [0xC] = { COMMON_STEP(D0) }, }; static const struct xe_step_info adlp_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 }, - [0xC] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_D0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(C0) }, }; static const struct xe_step_info adlp_rpl_revids[] = { - [0x4] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_E0 }, + [0x4] = { COMMON_STEP(C0) }, }; static const struct xe_step_info adln_revids[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_D0 }, + [0x0] = { COMMON_STEP(A0) }, }; static const struct xe_step_info dg2_g10_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_A0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_A0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_B0 }, - [0x8] = { COMMON_GT_MEDIA_STEP(C0), .display = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A1) }, + [0x4] = { COMMON_STEP(B0) }, + [0x8] = { COMMON_STEP(C0) }, }; static const struct xe_step_info dg2_g11_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_B0 }, - [0x4] = { COMMON_GT_MEDIA_STEP(B0), .display = STEP_C0 }, - [0x5] = { COMMON_GT_MEDIA_STEP(B1), .display = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x4] = { COMMON_STEP(B0) }, + [0x5] = { COMMON_STEP(B1) }, }; static const struct xe_step_info dg2_g12_revid_step_tbl[] = { - [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display = STEP_C0 }, - [0x1] = { COMMON_GT_MEDIA_STEP(A1), .display = STEP_C0 }, + [0x0] = { COMMON_STEP(A0) }, + [0x1] = { COMMON_STEP(A1) }, }; static const struct xe_step_info pvc_revid_step_tbl[] = { @@ -195,7 +189,6 @@ struct xe_step_info xe_step_pre_gmdid_get(struct xe_device *xe) } else { drm_dbg(&xe->drm, "Using future steppings\n"); step.graphics = STEP_FUTURE; - step.display = STEP_FUTURE; } } diff --git a/drivers/gpu/drm/xe/xe_step_types.h b/drivers/gpu/drm/xe/xe_step_types.h index ccc9b4795e95..d978cc2512f2 100644 --- a/drivers/gpu/drm/xe/xe_step_types.h +++ b/drivers/gpu/drm/xe/xe_step_types.h @@ -11,12 +11,15 @@ struct xe_step_info { u8 graphics; u8 media; - u8 display; u8 basedie; }; #define STEP_ENUM_VAL(name) STEP_##name, +/* + * Always define four minor steppings 0-3 for each stepping to match GMD ID + * spacing of values. See xe_step_gmdid_get(). + */ #define STEP_NAME_LIST(func) \ func(A0) \ func(A1) \ @@ -34,7 +37,30 @@ struct xe_step_info { func(D1) \ func(D2) \ func(D3) \ - func(E0) + func(E0) \ + func(E1) \ + func(E2) \ + func(E3) \ + func(F0) \ + func(F1) \ + func(F2) \ + func(F3) \ + func(G0) \ + func(G1) \ + func(G2) \ + func(G3) \ + func(H0) \ + func(H1) \ + func(H2) \ + func(H3) \ + func(I0) \ + func(I1) \ + func(I2) \ + func(I3) \ + func(J0) \ + func(J1) \ + func(J2) \ + func(J3) /* * Symbolic steppings that do not match the hardware. These are valid both as gt diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 533246f42256..436faff09bac 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -55,7 +55,7 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, struct xe_user_fence *ufence; u64 __user *ptr = u64_to_user_ptr(addr); - if (!access_ok(ptr, sizeof(ptr))) + if (!access_ok(ptr, sizeof(*ptr))) return ERR_PTR(-EFAULT); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); @@ -206,16 +206,9 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) { - int err; - - if (sync->fence) { - err = drm_sched_job_add_dependency(&job->drm, - dma_fence_get(sync->fence)); - if (err) { - dma_fence_put(sync->fence); - return err; - } - } + if (sync->fence) + return drm_sched_job_add_dependency(&job->drm, + dma_fence_get(sync->fence)); return 0; } @@ -256,10 +249,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) { if (sync->syncobj) drm_syncobj_put(sync->syncobj); - if (sync->fence) - dma_fence_put(sync->fence); - if (sync->chain_fence) - dma_fence_put(&sync->chain_fence->base); + dma_fence_put(sync->fence); + dma_fence_chain_free(sync->chain_fence); if (sync->ufence) user_fence_put(sync->ufence); } diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 1abdb30cb7ad..8573d7a87d84 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_ARGS(fence), TP_STRUCT__entry( - __string(dev, __dev_name_gt(fence->ctx->gt)) + __string(dev, __dev_name_xe(fence->xe)) __field(u64, ctx) __field(u32, seqno) __field(struct xe_hw_fence *, fence) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index f46fd2df84de..f7113cf6109d 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -5,7 +5,6 @@ */ #include <drm/drm_managed.h> -#include <drm/drm_mm.h> #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_placement.h> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index fe3779fdba2c..423b261ea743 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -150,7 +150,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, } while (remaining_size); if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks)) + if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) size = vres->base.size; } diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 77d4eec0118d..faa1bf42e50e 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -39,12 +39,23 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { }, { XE_RTP_NAME("Tuning: Compression Overfetch"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), - XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)), + XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), + SET(CCCHKNREG1, L3CMPCTRL)) }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, + { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(L3SQCREG2, + COMPMEMRD256BOVRFETCHEN)) + }, + { XE_RTP_NAME("Tuning: Stateless compression control"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, + REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) + }, {} }; diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 5b70d23724c4..4bb2a4a80ddc 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -15,6 +15,7 @@ #include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_guc.h" #include "xe_map.h" #include "xe_mmio.h" #include "xe_module.h" @@ -105,15 +106,16 @@ struct fw_blobs_by_type { }; #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 19, 2)) \ - fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 19, 2)) \ - fw_def(DG2, major_ver(i915, guc, dg2, 70, 19, 2)) \ - fw_def(DG1, major_ver(i915, guc, dg1, 70, 19, 2)) \ - fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 19, 2)) \ - fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) \ - fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) + fw_def(BATTLEMAGE, major_ver(xe, guc, bmg, 70, 29, 2)) \ + fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 29, 2)) \ + fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 29, 2)) \ + fw_def(DG2, major_ver(i915, guc, dg2, 70, 29, 2)) \ + fw_def(DG1, major_ver(i915, guc, dg1, 70, 29, 2)) \ + fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 29, 2)) \ + fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) \ + fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 29, 2)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \ @@ -309,10 +311,10 @@ static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC); - /* We don't support GuC releases older than 70.19 */ - if (release->major < 70 || (release->major == 70 && release->minor < 19)) { - xe_gt_err(gt, "Unsupported GuC v%u.%u! v70.19 or newer is required\n", - release->major, release->minor); + /* We don't support GuC releases older than 70.29.2 */ + if (MAKE_GUC_VER_STRUCT(*release) < MAKE_GUC_VER(70, 29, 2)) { + xe_gt_err(gt, "Unsupported GuC v%u.%u.%u! v70.29.2 or newer is required\n", + release->major, release->minor, release->patch); return -EINVAL; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index b715883f40d8..3eb76d874eb2 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -275,6 +275,8 @@ out_up_write: * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM * @vm: The VM. * @q: The exec_queue + * + * Note that this function might be called multiple times on the same queue. */ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) { @@ -282,8 +284,10 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) return; down_write(&vm->lock); - list_del(&q->lr.link); - --vm->preempt.num_exec_queues; + if (!list_empty(&q->lr.link)) { + list_del_init(&q->lr.link); + --vm->preempt.num_exec_queues; + } if (q->lr.pfence) { dma_fence_enable_sw_signaling(q->lr.pfence); dma_fence_put(q->lr.pfence); @@ -1191,7 +1195,7 @@ static const struct drm_gpuvm_ops gpuvm_ops = { .vm_free = xe_vm_free, }; -static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) +static u64 pde_encode_pat_index(u16 pat_index) { u64 pte = 0; @@ -1204,8 +1208,7 @@ static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) return pte; } -static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index, - u32 pt_level) +static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level) { u64 pte = 0; @@ -1246,12 +1249,11 @@ static u64 pte_encode_ps(u32 pt_level) static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, const u16 pat_index) { - struct xe_device *xe = xe_bo_device(bo); u64 pde; pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_pat_index(xe, pat_index); + pde |= pde_encode_pat_index(pat_index); return pde; } @@ -1259,12 +1261,11 @@ static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index, u32 pt_level) { - struct xe_device *xe = xe_bo_device(bo); u64 pte; pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) @@ -1276,14 +1277,12 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, u16 pat_index, u32 pt_level) { - struct xe_device *xe = xe_vma_vm(vma)->xe; - pte |= XE_PAGE_PRESENT; if (likely(!xe_vma_read_only(vma))) pte |= XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (unlikely(xe_vma_is_null(vma))) @@ -1303,7 +1302,7 @@ static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, pte = addr; pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); + pte |= pte_encode_pat_index(pat_index, pt_level); pte |= pte_encode_ps(pt_level); if (devmem) @@ -1483,19 +1482,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) /* Kernel migration VM shouldn't have a circular loop.. */ if (!(flags & XE_VM_FLAG_MIGRATION)) { for_each_tile(tile, xe, id) { - struct xe_gt *gt = tile->primary_gt; - struct xe_vm *migrate_vm; struct xe_exec_queue *q; u32 create_flags = EXEC_QUEUE_FLAG_VM; if (!vm->pt_root[id]) continue; - migrate_vm = xe_migrate_get_vm(tile->migrate); - q = xe_exec_queue_create_class(xe, gt, migrate_vm, - XE_ENGINE_CLASS_COPY, - create_flags); - xe_vm_put(migrate_vm); + q = xe_exec_queue_create_bind(xe, tile, create_flags, 0); if (IS_ERR(q)) { err = PTR_ERR(q); goto err_close; @@ -1508,13 +1501,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); - mutex_lock(&xe->usm.lock); - if (flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode++; - else if (!(flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode++; - mutex_unlock(&xe->usm.lock); - trace_xe_vm_create(vm); return vm; @@ -1628,11 +1614,6 @@ void xe_vm_close_and_put(struct xe_vm *vm) up_write(&vm->lock); mutex_lock(&xe->usm.lock); - if (vm->flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode--; - else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode--; - if (vm->usm.asid) { void *lookup; @@ -1673,6 +1654,7 @@ static void vm_destroy_work_func(struct work_struct *w) XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); + ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); if (vm->xef) @@ -1769,14 +1751,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && - xe_device_in_non_fault_mode(xe))) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && - xe_device_in_fault_mode(xe))) - return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; @@ -2904,7 +2878,16 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, return -EINVAL; } - if (bo->flags & XE_BO_FLAG_INTERNAL_64K) { + /* + * Some platforms require 64k VM_BIND alignment, + * specifically those with XE_VRAM_FLAGS_NEED64K. + * + * Other platforms may have BO's set to 64k physical placement, + * but can be mapped at 4k offsets anyway. This check is only + * there for the former case. + */ + if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) && + (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) { if (XE_IOCTL_DBG(xe, obj_offset & XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || @@ -3184,9 +3167,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; - u32 tile_needs_invalidate = 0; + struct xe_gt_tlb_invalidation_fence + fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; u8 id; + u32 fence_id = 0; int ret = 0; xe_assert(xe, !xe_vma_is_null(vma)); @@ -3214,27 +3198,37 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (xe_pt_zap_ptes(tile, vma)) { xe_device_wmb(xe); xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[id], true); + &fence[fence_id], + true); - /* - * FIXME: We potentially need to invalidate multiple - * GTs within the tile - */ ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[id], vma); + &fence[fence_id], vma); if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence[id]); + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); goto wait; } + ++fence_id; + + if (!tile->media_gt) + continue; - tile_needs_invalidate |= BIT(id); + xe_gt_tlb_invalidation_fence_init(tile->media_gt, + &fence[fence_id], + true); + + ret = xe_gt_tlb_invalidation_vma(tile->media_gt, + &fence[fence_id], vma); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); + goto wait; + } + ++fence_id; } } wait: - for_each_tile(tile, xe, id) - if (tile_needs_invalidate & BIT(id)) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); + for (id = 0; id < fence_id; ++id) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); vma->tile_invalidated = vma->tile_mask; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 564e32e44e3b..28b7f95b6c2f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -542,6 +542,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("14021821874"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) + }, /* Xe2_LPM */ diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 540d38603f32..920ca5060146 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -27,7 +27,13 @@ 16022287689 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) 13011645652 GRAPHICS_VERSION(2004) +14022293748 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) +22019794406 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) 22019338487_display PLATFORM(LUNARLAKE) 16023588340 GRAPHICS_VERSION(2001) +14019789679 GRAPHICS_VERSION(1255) + GRAPHICS_VERSION_RANGE(1270, 2004) |
